CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;48 of 48 results for author: <span class="mathjax">Kim, S H</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Kim%2C+S+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Kim, S H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Kim%2C+S+H&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Kim, S H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10761">arXiv:2411.10761</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10761">pdf</a>, <a href="https://arxiv.org/format/2411.10761">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Can Generic LLMs Help Analyze Child-adult Interactions Involving Children with Autism in Clinical Observation? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Feng%2C+T">Tiantian Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+A">Anfeng Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Lahiri%2C+R">Rimita Lahiri</a>, <a href="/search/cs?searchtype=author&amp;query=Tager-Flusberg%2C+H">Helen Tager-Flusberg</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">So Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Bishop%2C+S">Somer Bishop</a>, <a href="/search/cs?searchtype=author&amp;query=Lord%2C+C">Catherine Lord</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanan%2C+S">Shrikanth Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10761v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have shown significant potential in understanding human communication and interaction. However, their performance in the domain of child-inclusive interactions, including in clinical settings, remains less explored. In this work, we evaluate generic LLMs&#39; ability to analyze child-adult dyadic interactions in a clinically relevant context involving children with ASD. Sp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10761v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10761v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10761v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have shown significant potential in understanding human communication and interaction. However, their performance in the domain of child-inclusive interactions, including in clinical settings, remains less explored. In this work, we evaluate generic LLMs&#39; ability to analyze child-adult dyadic interactions in a clinically relevant context involving children with ASD. Specifically, we explore LLMs in performing four tasks: classifying child-adult utterances, predicting engaged activities, recognizing language skills and understanding traits that are clinically relevant. Our evaluation shows that generic LLMs are highly capable of analyzing long and complex conversations in clinical observation sessions, often surpassing the performance of non-expert human evaluators. The results show their potential to segment interactions of interest, assist in language skills evaluation, identify engaged activities, and offer clinical-relevant context for assessments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10761v1-abstract-full').style.display = 'none'; document.getElementById('2411.10761v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">GenAI for Health Workshop, NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13606">arXiv:2409.13606</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.13606">pdf</a>, <a href="https://arxiv.org/format/2409.13606">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Child-Inclusive Clinical Video Understanding for Autism Spectrum Disorder </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kommineni%2C+A">Aditya Kommineni</a>, <a href="/search/cs?searchtype=author&amp;query=Bose%2C+D">Digbalay Bose</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+T">Tiantian Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">So Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Tager-Flusberg%2C+H">Helen Tager-Flusberg</a>, <a href="/search/cs?searchtype=author&amp;query=Bishop%2C+S">Somer Bishop</a>, <a href="/search/cs?searchtype=author&amp;query=Lord%2C+C">Catherine Lord</a>, <a href="/search/cs?searchtype=author&amp;query=Kadiri%2C+S">Sudarsana Kadiri</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanan%2C+S">Shrikanth Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13606v1-abstract-short" style="display: inline;"> Clinical videos in the context of Autism Spectrum Disorder are often long-form interactions between children and caregivers/clinical professionals, encompassing complex verbal and non-verbal behaviors. Objective analyses of these videos could provide clinicians and researchers with nuanced insights into the behavior of children with Autism Spectrum Disorder. Manually coding these videos is a time-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13606v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13606v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13606v1-abstract-full" style="display: none;"> Clinical videos in the context of Autism Spectrum Disorder are often long-form interactions between children and caregivers/clinical professionals, encompassing complex verbal and non-verbal behaviors. Objective analyses of these videos could provide clinicians and researchers with nuanced insights into the behavior of children with Autism Spectrum Disorder. Manually coding these videos is a time-consuming task and requires a high level of domain expertise. Hence, the ability to capture these interactions computationally can augment the manual effort and enable supporting the diagnostic procedure. In this work, we investigate the use of foundation models across three modalities: speech, video, and text, to analyse child-focused interaction sessions. We propose a unified methodology to combine multiple modalities by using large language models as reasoning agents. We evaluate their performance on two tasks with different information granularity: activity recognition and abnormal behavior detection. We find that the proposed multimodal pipeline provides robustness to modality-specific limitations and improves performance on the clinical video analysis compared to unimodal settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13606v1-abstract-full').style.display = 'none'; document.getElementById('2409.13606v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12443">arXiv:2409.12443</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.12443">pdf</a>, <a href="https://arxiv.org/format/2409.12443">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> A Neural Network-based Framework for Fast and Smooth Posture Reconstruction of a Soft Continuum Arm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tixian Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+H">Heng-Sheng Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+J">Jiamiao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Akcal%2C+U">Ugur Akcal</a>, <a href="/search/cs?searchtype=author&amp;query=Walt%2C+B">Benjamin Walt</a>, <a href="/search/cs?searchtype=author&amp;query=Biskup%2C+D">Darren Biskup</a>, <a href="/search/cs?searchtype=author&amp;query=Halder%2C+U">Udit Halder</a>, <a href="/search/cs?searchtype=author&amp;query=Krishnan%2C+G">Girish Krishnan</a>, <a href="/search/cs?searchtype=author&amp;query=Chowdhary%2C+G">Girish Chowdhary</a>, <a href="/search/cs?searchtype=author&amp;query=Gazzola%2C+M">Mattia Gazzola</a>, <a href="/search/cs?searchtype=author&amp;query=Mehta%2C+P+G">Prashant G. Mehta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12443v1-abstract-short" style="display: inline;"> A neural network-based framework is developed and experimentally demonstrated for the problem of estimating the shape of a soft continuum arm (SCA) from noisy measurements of the pose at a finite number of locations along the length of the arm. The neural network takes as input these measurements and produces as output a finite-dimensional approximation of the strain, which is further used to reco&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12443v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12443v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12443v1-abstract-full" style="display: none;"> A neural network-based framework is developed and experimentally demonstrated for the problem of estimating the shape of a soft continuum arm (SCA) from noisy measurements of the pose at a finite number of locations along the length of the arm. The neural network takes as input these measurements and produces as output a finite-dimensional approximation of the strain, which is further used to reconstruct the infinite-dimensional smooth posture. This problem is important for various soft robotic applications. It is challenging due to the flexible aspects that lead to the infinite-dimensional reconstruction problem for the continuous posture and strains. Because of this, past solutions to this problem are computationally intensive. The proposed fast smooth reconstruction method is shown to be five orders of magnitude faster while having comparable accuracy. The framework is evaluated on two testbeds: a simulated octopus muscular arm and a physical BR2 pneumatic soft manipulator. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12443v1-abstract-full').style.display = 'none'; document.getElementById('2409.12443v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages + reference, 5 figures, submitted to ICRA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08577">arXiv:2408.08577</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08577">pdf</a>, <a href="https://arxiv.org/format/2408.08577">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Soft Condensed Matter">cond-mat.soft</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biological Physics">physics.bio-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Mechanistic Modeling of Lipid Nanoparticle Formation for the Delivery of Nucleic Acid Therapeutics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Inguva%2C+P+K">Pavan K. Inguva</a>, <a href="/search/cs?searchtype=author&amp;query=Mukherjee%2C+S">Saikat Mukherjee</a>, <a href="/search/cs?searchtype=author&amp;query=Walker%2C+P+J">Pierre J. Walker</a>, <a href="/search/cs?searchtype=author&amp;query=Kanso%2C+M+A">Mona A. Kanso</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jie Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yanchen Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Tenberg%2C+V">Vico Tenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Santra%2C+S">Srimanta Santra</a>, <a href="/search/cs?searchtype=author&amp;query=Singh%2C+S">Shalini Singh</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Shin Hyuk Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Trout%2C+B+L">Bernhardt L. Trout</a>, <a href="/search/cs?searchtype=author&amp;query=Bazant%2C+M+Z">Martin Z. Bazant</a>, <a href="/search/cs?searchtype=author&amp;query=Myerson%2C+A+S">Allan S. Myerson</a>, <a href="/search/cs?searchtype=author&amp;query=Braatz%2C+R+D">Richard D. Braatz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08577v1-abstract-short" style="display: inline;"> Nucleic acids such as mRNA have emerged as a promising therapeutic modality with the capability of addressing a wide range of diseases. Lipid nanoparticles (LNPs) as a delivery platform for nucleic acids were used in the COVID-19 vaccines and have received much attention. While modern manufacturing processes which involve rapidly mixing an organic stream containing the lipids with an aqueous strea&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08577v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08577v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08577v1-abstract-full" style="display: none;"> Nucleic acids such as mRNA have emerged as a promising therapeutic modality with the capability of addressing a wide range of diseases. Lipid nanoparticles (LNPs) as a delivery platform for nucleic acids were used in the COVID-19 vaccines and have received much attention. While modern manufacturing processes which involve rapidly mixing an organic stream containing the lipids with an aqueous stream containing the nucleic acids are conceptually straightforward, detailed understanding of LNP formation and structure is still limited and scale-up can be challenging. Mathematical and computational methods are a promising avenue for deepening scientific understanding of the LNP formation process and facilitating improved process development and control. This article describes strategies for the mechanistic modeling of LNP formation, starting with strategies to estimate and predict important physicochemical properties of the various species such as diffusivities and solubilities. Subsequently, a framework is outlined for constructing mechanistic models of reactor- and particle-scale processes. Insights gained from the various models are mapped back to product quality attributes and process insights. Lastly, the use of the models to guide development of advanced process control and optimization strategies is discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08577v1-abstract-full').style.display = 'none'; document.getElementById('2408.08577v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">67 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07648">arXiv:2408.07648</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.07648">pdf</a>, <a href="https://arxiv.org/format/2408.07648">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> See It All: Contextualized Late Aggregation for 3D Dense Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+M">Minjung Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+H+S">Hyung Suk Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Soonyoung Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+B">Bumsoo Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+G">Gunhee Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07648v1-abstract-short" style="display: inline;"> 3D dense captioning is a task to localize objects in a 3D scene and generate descriptive sentences for each object. Recent approaches in 3D dense captioning have adopted transformer encoder-decoder frameworks from object detection to build an end-to-end pipeline without hand-crafted components. However, these approaches struggle with contradicting objectives where a single query attention has to s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07648v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07648v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07648v1-abstract-full" style="display: none;"> 3D dense captioning is a task to localize objects in a 3D scene and generate descriptive sentences for each object. Recent approaches in 3D dense captioning have adopted transformer encoder-decoder frameworks from object detection to build an end-to-end pipeline without hand-crafted components. However, these approaches struggle with contradicting objectives where a single query attention has to simultaneously view both the tightly localized object regions and contextual environment. To overcome this challenge, we introduce SIA (See-It-All), a transformer pipeline that engages in 3D dense captioning with a novel paradigm called late aggregation. SIA simultaneously decodes two sets of queries-context query and instance query. The instance query focuses on localization and object attribute descriptions, while the context query versatilely captures the region-of-interest of relationships between multiple objects or with the global scene, then aggregated afterwards (i.e., late aggregation) via simple distance-based measures. To further enhance the quality of contextualized caption generation, we design a novel aggregator to generate a fully informed caption based on the surrounding context, the global environment, and object instances. Extensive experiments on two of the most widely-used 3D dense captioning datasets demonstrate that our proposed method achieves a significant improvement over prior methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07648v1-abstract-full').style.display = 'none'; document.getElementById('2408.07648v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2024 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07622">arXiv:2404.07622</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.07622">pdf</a>, <a href="https://arxiv.org/format/2404.07622">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Language Models Meet Anomaly Detection for Better Interpretability and Generalizability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Su Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+P">Philip M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Felsner%2C+L">Lina Felsner</a>, <a href="/search/cs?searchtype=author&amp;query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/cs?searchtype=author&amp;query=Wiestler%2C+B">Benedikt Wiestler</a>, <a href="/search/cs?searchtype=author&amp;query=Schnabel%2C+J+A">Julia A. Schnabel</a>, <a href="/search/cs?searchtype=author&amp;query=Bercea%2C+C+I">Cosmin I. Bercea</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07622v2-abstract-short" style="display: inline;"> This research explores the integration of language models and unsupervised anomaly detection in medical imaging, addressing two key questions: (1) Can language models enhance the interpretability of anomaly detection maps? and (2) Can anomaly maps improve the generalizability of language models in open-set anomaly detection tasks? To investigate these questions, we introduce a new dataset for mult&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07622v2-abstract-full').style.display = 'inline'; document.getElementById('2404.07622v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07622v2-abstract-full" style="display: none;"> This research explores the integration of language models and unsupervised anomaly detection in medical imaging, addressing two key questions: (1) Can language models enhance the interpretability of anomaly detection maps? and (2) Can anomaly maps improve the generalizability of language models in open-set anomaly detection tasks? To investigate these questions, we introduce a new dataset for multi-image visual question-answering on brain magnetic resonance images encompassing multiple conditions. We propose KQ-Former (Knowledge Querying Transformer), which is designed to optimally align visual and textual information in limited-sample contexts. Our model achieves a 60.81% accuracy on closed questions, covering disease classification and severity across 15 different classes. For open questions, KQ-Former demonstrates a 70% improvement over the baseline with a BLEU-4 score of 0.41, and achieves the highest entailment ratios (up to 71.9%) and lowest contradiction ratios (down to 10.0%) among various natural language inference models. Furthermore, integrating anomaly maps results in an 18% accuracy increase in detecting open-set anomalies, thereby enhancing the language model&#39;s generalizability to previously unseen medical conditions. The code and dataset are available at https://github.com/compai-lab/miccai-2024-junli?tab=readme-ov-file <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07622v2-abstract-full').style.display = 'none'; document.getElementById('2404.07622v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 7 figures. 5th International Workshop on Multiscale Multimodal Medical Imaging (MMMI 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.04787">arXiv:2403.04787</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.04787">pdf</a>, <a href="https://arxiv.org/format/2403.04787">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Ever-Evolving Memory by Blending and Refining the Past </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seo Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Ka%2C+K">Keummin Ka</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+Y">Yohan Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Hwang%2C+S">Seung-won Hwang</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+D">Dongha Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Yeo%2C+J">Jinyoung Yeo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.04787v2-abstract-short" style="display: inline;"> For a human-like chatbot, constructing a long-term memory is crucial. However, current large language models often lack this capability, leading to instances of missing important user information or redundantly asking for the same information, thereby diminishing conversation quality. To effectively construct memory, it is crucial to seamlessly connect past and present information, while also poss&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04787v2-abstract-full').style.display = 'inline'; document.getElementById('2403.04787v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.04787v2-abstract-full" style="display: none;"> For a human-like chatbot, constructing a long-term memory is crucial. However, current large language models often lack this capability, leading to instances of missing important user information or redundantly asking for the same information, thereby diminishing conversation quality. To effectively construct memory, it is crucial to seamlessly connect past and present information, while also possessing the ability to forget obstructive information. To address these challenges, we propose CREEM, a novel memory system for long-term conversation. Improving upon existing approaches that construct memory based solely on current sessions, CREEM blends past memories during memory formation. Additionally, we introduce a refining process to handle redundant or outdated information. Unlike traditional paradigms, we view responding and memory construction as inseparable tasks. The blending process, which creates new memories, also serves as a reasoning step for response generation by informing the connection between past and present. Through evaluation, we demonstrate that CREEM enhances both memory and response qualities in multi-session personalized dialogues. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04787v2-abstract-full').style.display = 'none'; document.getElementById('2403.04787v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 4 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.13822">arXiv:2312.13822</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.13822">pdf</a>, <a href="https://arxiv.org/format/2312.13822">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Universal Noise Annotation: Unveiling the Impact of Noisy annotation on Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ryoo%2C+K">Kwangrok Ryoo</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+Y">Yeonsik Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seungjun Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+M">Mira Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+A">Ahra Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Seungryong Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Soonyoung Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.13822v1-abstract-short" style="display: inline;"> For object detection task with noisy labels, it is important to consider not only categorization noise, as in image classification, but also localization noise, missing annotations, and bogus bounding boxes. However, previous studies have only addressed certain types of noise (e.g., localization or categorization). In this paper, we propose Universal-Noise Annotation (UNA), a more practical settin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13822v1-abstract-full').style.display = 'inline'; document.getElementById('2312.13822v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.13822v1-abstract-full" style="display: none;"> For object detection task with noisy labels, it is important to consider not only categorization noise, as in image classification, but also localization noise, missing annotations, and bogus bounding boxes. However, previous studies have only addressed certain types of noise (e.g., localization or categorization). In this paper, we propose Universal-Noise Annotation (UNA), a more practical setting that encompasses all types of noise that can occur in object detection, and analyze how UNA affects the performance of the detector. We analyzed the development direction of previous works of detection algorithms and examined the factors that impact the robustness of detection model learning method. We open-source the code for injecting UNA into the dataset and all the training log and weight are also shared. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13822v1-abstract-full').style.display = 'none'; document.getElementById('2312.13822v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">appendix and code : https://github.com/Ryoo72/UNA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.12661">arXiv:2312.12661</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.12661">pdf</a>, <a href="https://arxiv.org/format/2312.12661">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Misalign, Contrast then Distill: Rethinking Misalignments in Language-Image Pretraining </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+B">Bumsoo Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+Y">Yeonsik Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jinhyung Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.12661v1-abstract-short" style="display: inline;"> Contrastive Language-Image Pretraining has emerged as a prominent approach for training vision and text encoders with uncurated image-text pairs from the web. To enhance data-efficiency, recent efforts have introduced additional supervision terms that involve random-augmented views of the image. However, since the image augmentation process is unaware of its text counterpart, this procedure could&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12661v1-abstract-full').style.display = 'inline'; document.getElementById('2312.12661v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.12661v1-abstract-full" style="display: none;"> Contrastive Language-Image Pretraining has emerged as a prominent approach for training vision and text encoders with uncurated image-text pairs from the web. To enhance data-efficiency, recent efforts have introduced additional supervision terms that involve random-augmented views of the image. However, since the image augmentation process is unaware of its text counterpart, this procedure could cause various degrees of image-text misalignments during training. Prior methods either disregarded this discrepancy or introduced external models to mitigate the impact of misalignments during training. In contrast, we propose a novel metric learning approach that capitalizes on these misalignments as an additional training source, which we term &#34;Misalign, Contrast then Distill (MCD)&#34;. Unlike previous methods that treat augmented images and their text counterparts as simple positive pairs, MCD predicts the continuous scales of misalignment caused by the augmentation. Our extensive experimental results show that our proposed MCD achieves state-of-the-art transferability in multiple classification and retrieval downstream datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12661v1-abstract-full').style.display = 'none'; document.getElementById('2312.12661v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICCV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.12659">arXiv:2312.12659</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.12659">pdf</a>, <a href="https://arxiv.org/format/2312.12659">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Expediting Contrastive Language-Image Pretraining via Self-distilled Encoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+B">Bumsoo Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jinhyung Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+Y">Yeonsik Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.12659v1-abstract-short" style="display: inline;"> Recent advances in vision language pretraining (VLP) have been largely attributed to the large-scale data collected from the web. However, uncurated dataset contains weakly correlated image-text pairs, causing data inefficiency. To address the issue, knowledge distillation have been explored at the expense of extra image and text momentum encoders to generate teaching signals for misaligned image-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12659v1-abstract-full').style.display = 'inline'; document.getElementById('2312.12659v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.12659v1-abstract-full" style="display: none;"> Recent advances in vision language pretraining (VLP) have been largely attributed to the large-scale data collected from the web. However, uncurated dataset contains weakly correlated image-text pairs, causing data inefficiency. To address the issue, knowledge distillation have been explored at the expense of extra image and text momentum encoders to generate teaching signals for misaligned image-text pairs. In this paper, our goal is to resolve the misalignment problem with an efficient distillation framework. To this end, we propose ECLIPSE: Expediting Contrastive Language-Image Pretraining with Self-distilled Encoders. ECLIPSE features a distinctive distillation architecture wherein a shared text encoder is utilized between an online image encoder and a momentum image encoder. This strategic design choice enables the distillation to operate within a unified projected space of text embedding, resulting in better performance. Based on the unified text embedding space, ECLIPSE compensates for the additional computational cost of the momentum image encoder by expediting the online image encoder. Through our extensive experiments, we validate that there is a sweet spot between expedition and distillation where the partial view from the expedited online image encoder interacts complementarily with the momentum teacher. As a result, ECLIPSE outperforms its counterparts while achieving substantial acceleration in inference speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12659v1-abstract-full').style.display = 'none'; document.getElementById('2312.12659v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.15263">arXiv:2310.15263</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.15263">pdf</a>, <a href="https://arxiv.org/format/2310.15263">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> One-hot Generalized Linear Model for Switching Brain State Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chengrui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Soon Ho Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Rodgers%2C+C">Chris Rodgers</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+H">Hannah Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+A">Anqi Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.15263v1-abstract-short" style="display: inline;"> Exposing meaningful and interpretable neural interactions is critical to understanding neural circuits. Inferred neural interactions from neural signals primarily reflect functional interactions. In a long experiment, subject animals may experience different stages defined by the experiment, stimuli, or behavioral states, and hence functional interactions can change over time. To model dynamically&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15263v1-abstract-full').style.display = 'inline'; document.getElementById('2310.15263v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.15263v1-abstract-full" style="display: none;"> Exposing meaningful and interpretable neural interactions is critical to understanding neural circuits. Inferred neural interactions from neural signals primarily reflect functional interactions. In a long experiment, subject animals may experience different stages defined by the experiment, stimuli, or behavioral states, and hence functional interactions can change over time. To model dynamically changing functional interactions, prior work employs state-switching generalized linear models with hidden Markov models (i.e., HMM-GLMs). However, we argue they lack biological plausibility, as functional interactions are shaped and confined by the underlying anatomical connectome. Here, we propose a novel prior-informed state-switching GLM. We introduce both a Gaussian prior and a one-hot prior over the GLM in each state. The priors are learnable. We will show that the learned prior should capture the state-constant interaction, shedding light on the underlying anatomical connectome and revealing more likely physical neuron interactions. The state-dependent interaction modeled by each GLM offers traceability to capture functional variations across multiple brain states. Our methods effectively recover true interaction structures in simulated data, achieve the highest predictive likelihood with real neural datasets, and render interaction structures and hidden states more interpretable when applied to real neural data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15263v1-abstract-full').style.display = 'none'; document.getElementById('2310.15263v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.08221">arXiv:2310.08221</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.08221">pdf</a>, <a href="https://arxiv.org/format/2310.08221">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SimCKP: Simple Contrastive Learning of Keyphrase Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Choi%2C+M">Minseok Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Gwak%2C+C">Chaeheon Gwak</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Seho Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Si Hyeong Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Choo%2C+J">Jaegul Choo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.08221v1-abstract-short" style="display: inline;"> Keyphrase generation (KG) aims to generate a set of summarizing words or phrases given a source document, while keyphrase extraction (KE) aims to identify them from the text. Because the search space is much smaller in KE, it is often combined with KG to predict keyphrases that may or may not exist in the corresponding document. However, current unified approaches adopt sequence labeling and maxim&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08221v1-abstract-full').style.display = 'inline'; document.getElementById('2310.08221v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.08221v1-abstract-full" style="display: none;"> Keyphrase generation (KG) aims to generate a set of summarizing words or phrases given a source document, while keyphrase extraction (KE) aims to identify them from the text. Because the search space is much smaller in KE, it is often combined with KG to predict keyphrases that may or may not exist in the corresponding document. However, current unified approaches adopt sequence labeling and maximization-based generation that primarily operate at a token level, falling short in observing and scoring keyphrases as a whole. In this work, we propose SimCKP, a simple contrastive learning framework that consists of two stages: 1) An extractor-generator that extracts keyphrases by learning context-aware phrase-level representations in a contrastive manner while also generating keyphrases that do not appear in the document; 2) A reranker that adapts scores for each generated phrase by likewise aligning their representations with the corresponding document. Experimental results on multiple benchmark datasets demonstrate the effectiveness of our proposed approach, which outperforms the state-of-the-art models by a significant margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08221v1-abstract-full').style.display = 'none'; document.getElementById('2310.08221v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.01961">arXiv:2309.01961</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.01961">pdf</a>, <a href="https://arxiv.org/format/2309.01961">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NICE: CVPR 2023 Challenge on Zero-shot Image Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+T">Taehoon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Ahn%2C+P">Pyunghwan Ahn</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sangyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sihaeng Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Marsden%2C+M">Mark Marsden</a>, <a href="/search/cs?searchtype=author&amp;query=Sala%2C+A">Alessandra Sala</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+B">Bohyung Han</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+K+M">Kyoung Mu Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Honglak Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+K">Kyounghoon Bae</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xiangyu Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+Y">Yi Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hailiang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+W">Weili Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+J">Jianfeng Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Oh%2C+Y">Youngtaek Oh</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+J+W">Jae Won Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+D">Dong-jin Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kweon%2C+I+S">In So Kweon</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Junmo Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+W">Wooyoung Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Jhoo%2C+W+Y">Won Young Jhoo</a>, <a href="/search/cs?searchtype=author&amp;query=Roh%2C+B">Byungseok Roh</a> , et al. (17 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.01961v3-abstract-short" style="display: inline;"> In this report, we introduce NICE (New frontiers for zero-shot Image Captioning Evaluation) project and share the results and outcomes of 2023 challenge. This project is designed to challenge the computer vision community to develop robust image captioning models that advance the state-of-the-art both in terms of accuracy and fairness. Through the challenge, the image captioning models were tested&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01961v3-abstract-full').style.display = 'inline'; document.getElementById('2309.01961v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.01961v3-abstract-full" style="display: none;"> In this report, we introduce NICE (New frontiers for zero-shot Image Captioning Evaluation) project and share the results and outcomes of 2023 challenge. This project is designed to challenge the computer vision community to develop robust image captioning models that advance the state-of-the-art both in terms of accuracy and fairness. Through the challenge, the image captioning models were tested using a new evaluation dataset that includes a large variety of visual concepts from many domains. There was no specific training data provided for the challenge, and therefore the challenge entries were required to adapt to new types of image descriptions that had not been seen during training. This report includes information on the newly proposed NICE dataset, evaluation methods, challenge results, and technical details of top-ranking entries. We expect that the outcomes of the challenge will contribute to the improvement of AI models on various vision-language tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01961v3-abstract-full').style.display = 'none'; document.getElementById('2309.01961v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Tech report, project page https://nice.lgresearch.ai/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.07575">arXiv:2308.07575</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.07575">pdf</a>, <a href="https://arxiv.org/format/2308.07575">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Story Visualization by Online Text Augmentation with Context Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ahn%2C+D">Daechul Ahn</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+D">Daneul Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+G">Gwangmo Song</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Honglak Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+D">Dongyeop Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+J">Jonghyun Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.07575v2-abstract-short" style="display: inline;"> Story visualization (SV) is a challenging text-to-image generation task for the difficulty of not only rendering visual details from the text descriptions but also encoding a long-term context across multiple sentences. While prior efforts mostly focus on generating a semantically relevant image for each sentence, encoding a context spread across the given paragraph to generate contextually convin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07575v2-abstract-full').style.display = 'inline'; document.getElementById('2308.07575v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.07575v2-abstract-full" style="display: none;"> Story visualization (SV) is a challenging text-to-image generation task for the difficulty of not only rendering visual details from the text descriptions but also encoding a long-term context across multiple sentences. While prior efforts mostly focus on generating a semantically relevant image for each sentence, encoding a context spread across the given paragraph to generate contextually convincing images (e.g., with a correct character or with a proper background of the scene) remains a challenge. To this end, we propose a novel memory architecture for the Bi-directional Transformer framework with an online text augmentation that generates multiple pseudo-descriptions as supplementary supervision during training for better generalization to the language variation at inference. In extensive experiments on the two popular SV benchmarks, i.e., the Pororo-SV and Flintstones-SV, the proposed method significantly outperforms the state of the arts in various metrics including FID, character F1, frame accuracy, BLEU-2/3, and R-precision with similar or less computational complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07575v2-abstract-full').style.display = 'none'; document.getElementById('2308.07575v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICCV 2023, Project page: https://dcahn12.github.io/projects/CMOTA/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.16713">arXiv:2305.16713</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.16713">pdf</a>, <a href="https://arxiv.org/format/2305.16713">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ReConPatch : Contrastive Patch Representation Learning for Industrial Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hyun%2C+J">Jeeho Hyun</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sangyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Jeon%2C+G">Giyoung Jeon</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+K">Kyunghoon Bae</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+B+J">Byung Jun Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.16713v3-abstract-short" style="display: inline;"> Anomaly detection is crucial to the advanced identification of product defects such as incorrect parts, misaligned components, and damages in industrial manufacturing. Due to the rare observations and unknown types of defects, anomaly detection is considered to be challenging in machine learning. To overcome this difficulty, recent approaches utilize the common visual representations pre-trained f&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16713v3-abstract-full').style.display = 'inline'; document.getElementById('2305.16713v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.16713v3-abstract-full" style="display: none;"> Anomaly detection is crucial to the advanced identification of product defects such as incorrect parts, misaligned components, and damages in industrial manufacturing. Due to the rare observations and unknown types of defects, anomaly detection is considered to be challenging in machine learning. To overcome this difficulty, recent approaches utilize the common visual representations pre-trained from natural image datasets and distill the relevant features. However, existing approaches still have the discrepancy between the pre-trained feature and the target data, or require the input augmentation which should be carefully designed, particularly for the industrial dataset. In this paper, we introduce ReConPatch, which constructs discriminative features for anomaly detection by training a linear modulation of patch features extracted from the pre-trained model. ReConPatch employs contrastive representation learning to collect and distribute features in a way that produces a target-oriented and easily separable representation. To address the absence of labeled pairs for the contrastive learning, we utilize two similarity measures between data representations, pairwise and contextual similarities, as pseudo-labels. Our method achieves the state-of-the-art anomaly detection performance (99.72%) for the widely used and challenging MVTec AD dataset. Additionally, we achieved a state-of-the-art anomaly detection performance (95.8%) for the BTAD dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16713v3-abstract-full').style.display = 'none'; document.getElementById('2305.16713v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted on WACV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.01576">arXiv:2304.01576</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.01576">pdf</a>, <a href="https://arxiv.org/format/2304.01576">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MESAHA-Net: Multi-Encoders based Self-Adaptive Hard Attention Network with Maximum Intensity Projections for Lung Nodule Segmentation in CT Scan </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Usman%2C+M">Muhammad Usman</a>, <a href="/search/cs?searchtype=author&amp;query=Rehman%2C+A">Azka Rehman</a>, <a href="/search/cs?searchtype=author&amp;query=Shahid%2C+A">Abdullah Shahid</a>, <a href="/search/cs?searchtype=author&amp;query=Latif%2C+S">Siddique Latif</a>, <a href="/search/cs?searchtype=author&amp;query=Byon%2C+S+S">Shi Sub Byon</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sung Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Khan%2C+T+M">Tariq Mahmood Khan</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+Y+G">Yeong Gil Shin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.01576v1-abstract-short" style="display: inline;"> Accurate lung nodule segmentation is crucial for early-stage lung cancer diagnosis, as it can substantially enhance patient survival rates. Computed tomography (CT) images are widely employed for early diagnosis in lung nodule analysis. However, the heterogeneity of lung nodules, size diversity, and the complexity of the surrounding environment pose challenges for developing robust nodule segmenta&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.01576v1-abstract-full').style.display = 'inline'; document.getElementById('2304.01576v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.01576v1-abstract-full" style="display: none;"> Accurate lung nodule segmentation is crucial for early-stage lung cancer diagnosis, as it can substantially enhance patient survival rates. Computed tomography (CT) images are widely employed for early diagnosis in lung nodule analysis. However, the heterogeneity of lung nodules, size diversity, and the complexity of the surrounding environment pose challenges for developing robust nodule segmentation methods. In this study, we propose an efficient end-to-end framework, the multi-encoder-based self-adaptive hard attention network (MESAHA-Net), for precise lung nodule segmentation in CT scans. MESAHA-Net comprises three encoding paths, an attention block, and a decoder block, facilitating the integration of three types of inputs: CT slice patches, forward and backward maximum intensity projection (MIP) images, and region of interest (ROI) masks encompassing the nodule. By employing a novel adaptive hard attention mechanism, MESAHA-Net iteratively performs slice-by-slice 2D segmentation of lung nodules, focusing on the nodule region in each slice to generate 3D volumetric segmentation of lung nodules. The proposed framework has been comprehensively evaluated on the LIDC-IDRI dataset, the largest publicly available dataset for lung nodule segmentation. The results demonstrate that our approach is highly robust for various lung nodule types, outperforming previous state-of-the-art techniques in terms of segmentation accuracy and computational complexity, rendering it suitable for real-time clinical implementation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.01576v1-abstract-full').style.display = 'none'; document.getElementById('2304.01576v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.09917">arXiv:2303.09917</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.09917">pdf</a>, <a href="https://arxiv.org/format/2303.09917">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Vision Transformer for Action Units Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vu%2C+T">Tu Vu</a>, <a href="/search/cs?searchtype=author&amp;query=Huynh%2C+V+T">Van Thong Huynh</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Soo Hyung Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.09917v2-abstract-short" style="display: inline;"> Facial Action Units detection (FAUs) represents a fine-grained classification problem that involves identifying different units on the human face, as defined by the Facial Action Coding System. In this paper, we present a simple yet efficient Vision Transformer-based approach for addressing the task of Action Units (AU) detection in the context of Affective Behavior Analysis in-the-wild (ABAW) com&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.09917v2-abstract-full').style.display = 'inline'; document.getElementById('2303.09917v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.09917v2-abstract-full" style="display: none;"> Facial Action Units detection (FAUs) represents a fine-grained classification problem that involves identifying different units on the human face, as defined by the Facial Action Coding System. In this paper, we present a simple yet efficient Vision Transformer-based approach for addressing the task of Action Units (AU) detection in the context of Affective Behavior Analysis in-the-wild (ABAW) competition. We employ the Video Vision Transformer(ViViT) Network to capture the temporal facial change in the video. Besides, to reduce massive size of the Vision Transformers model, we replace the ViViT feature extraction layers with the CNN backbone (Regnet). Our model outperform the baseline model of ABAW 2023 challenge, with a notable 14% difference in result. Furthermore, the achieved results are comparable to those of the top three teams in the previous ABAW 2022 challenge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.09917v2-abstract-full').style.display = 'none'; document.getElementById('2303.09917v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Will be updated</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.05811">arXiv:2302.05811</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.05811">pdf</a>, <a href="https://arxiv.org/format/2302.05811">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical control and learning of a foraging CyberOctopus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shih%2C+C">Chia-Hsien Shih</a>, <a href="/search/cs?searchtype=author&amp;query=Naughton%2C+N">Noel Naughton</a>, <a href="/search/cs?searchtype=author&amp;query=Halder%2C+U">Udit Halder</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+H">Heng-Sheng Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Gillette%2C+R">Rhanor Gillette</a>, <a href="/search/cs?searchtype=author&amp;query=Mehta%2C+P+G">Prashant G. Mehta</a>, <a href="/search/cs?searchtype=author&amp;query=Gazzola%2C+M">Mattia Gazzola</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.05811v1-abstract-short" style="display: inline;"> Inspired by the unique neurophysiology of the octopus, we propose a hierarchical framework that simplifies the coordination of multiple soft arms by decomposing control into high-level decision making, low-level motor activation, and local reflexive behaviors via sensory feedback. When evaluated in the illustrative problem of a model octopus foraging for food, this hierarchical decomposition resul&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05811v1-abstract-full').style.display = 'inline'; document.getElementById('2302.05811v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.05811v1-abstract-full" style="display: none;"> Inspired by the unique neurophysiology of the octopus, we propose a hierarchical framework that simplifies the coordination of multiple soft arms by decomposing control into high-level decision making, low-level motor activation, and local reflexive behaviors via sensory feedback. When evaluated in the illustrative problem of a model octopus foraging for food, this hierarchical decomposition results in significant improvements relative to end-to-end methods. Performance is achieved through a mixed-modes approach, whereby qualitatively different tasks are addressed via complementary control schemes. Here, model-free reinforcement learning is employed for high-level decision-making, while model-based energy shaping takes care of arm-level motor execution. To render the pairing computationally tenable, a novel neural-network energy shaping (NN-ES) controller is developed, achieving accurate motions with time-to-solutions 200 times faster than previous attempts. Our hierarchical framework is then successfully deployed in increasingly challenging foraging scenarios, including an arena littered with obstacles in 3D space, demonstrating the viability of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05811v1-abstract-full').style.display = 'none'; document.getElementById('2302.05811v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.02506">arXiv:2302.02506</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.02506">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generating Dispatching Rules for the Interrupting Swap-Allowed Blocking Job Shop Problem Using Graph Neural Network and Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wong%2C+V+W+H">Vivian W. H. Wong</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sang Hun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Junyoung Park</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Jinkyoo Park</a>, <a href="/search/cs?searchtype=author&amp;query=Law%2C+K+H">Kincho H. Law</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.02506v2-abstract-short" style="display: inline;"> The interrupting swap-allowed blocking job shop problem (ISBJSSP) is a complex scheduling problem that is able to model many manufacturing planning and logistics applications realistically by addressing both the lack of storage capacity and unforeseen production interruptions. Subjected to random disruptions due to machine malfunction or maintenance, industry production settings often choose to ad&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02506v2-abstract-full').style.display = 'inline'; document.getElementById('2302.02506v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.02506v2-abstract-full" style="display: none;"> The interrupting swap-allowed blocking job shop problem (ISBJSSP) is a complex scheduling problem that is able to model many manufacturing planning and logistics applications realistically by addressing both the lack of storage capacity and unforeseen production interruptions. Subjected to random disruptions due to machine malfunction or maintenance, industry production settings often choose to adopt dispatching rules to enable adaptive, real-time re-scheduling, rather than traditional methods that require costly re-computation on the new configuration every time the problem condition changes dynamically. To generate dispatching rules for the ISBJSSP problem, we introduce a dynamic disjunctive graph formulation characterized by nodes and edges subjected to continuous deletions and additions. This formulation enables the training of an adaptive scheduler utilizing graph neural networks and reinforcement learning. Furthermore, a simulator is developed to simulate interruption, swapping, and blocking in the ISBJSSP setting. Employing a set of reported benchmark instances, we conduct a detailed experimental study on ISBJSSP instances with a range of machine shutdown probabilities to show that the scheduling policies generated can outperform or are at least as competitive as existing dispatching rules with predetermined priority. This study shows that the ISBJSSP, which requires real-time adaptive solutions, can be scheduled efficiently with the proposed method when production interruptions occur with random machine shutdowns. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02506v2-abstract-full').style.display = 'none'; document.getElementById('2302.02506v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 10 figures. Supplementary Material not included</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.07050">arXiv:2212.07050</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.07050">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41598-024-73695-z">10.1038/s41598-024-73695-z <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Significantly improving zero-shot X-ray pathology classification via fine-tuning pre-trained image-text encoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jang%2C+J">Jongseong Jang</a>, <a href="/search/cs?searchtype=author&amp;query=Kyung%2C+D">Daeun Kyung</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Honglak Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+K">Kyunghoon Bae</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+E">Edward Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.07050v3-abstract-short" style="display: inline;"> Deep neural networks are increasingly used in medical imaging for tasks such as pathological classification, but they face challenges due to the scarcity of high-quality, expert-labeled training data. Recent efforts have utilized pre-trained contrastive image-text models like CLIP, adapting them for medical use by fine-tuning the model with chest X-ray images and corresponding reports for zero-sho&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.07050v3-abstract-full').style.display = 'inline'; document.getElementById('2212.07050v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.07050v3-abstract-full" style="display: none;"> Deep neural networks are increasingly used in medical imaging for tasks such as pathological classification, but they face challenges due to the scarcity of high-quality, expert-labeled training data. Recent efforts have utilized pre-trained contrastive image-text models like CLIP, adapting them for medical use by fine-tuning the model with chest X-ray images and corresponding reports for zero-shot pathology classification, thus eliminating the need for pathology-specific annotations. However, most studies continue to use the same contrastive learning objectives as in the general domain, overlooking the multi-labeled nature of medical image-report pairs. In this paper, we propose a new fine-tuning strategy that includes positive-pair loss relaxation and random sentence sampling. We aim to improve the performance of zero-shot pathology classification without relying on external knowledge. Our method can be applied to any pre-trained contrastive image-text encoder and easily transferred to out-of-domain datasets without further training, as it does not use external data. Our approach consistently improves overall zero-shot pathology classification across four chest X-ray datasets and three pre-trained models, with an average macro AUROC increase of 4.3%. Additionally, our method outperforms the state-of-the-art and marginally surpasses board-certified radiologists in zero-shot classification for the five competition pathologies in the CheXpert dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.07050v3-abstract-full').style.display = 'none'; document.getElementById('2212.07050v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Sci Rep 14, 23199 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.06774">arXiv:2211.06774</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.06774">pdf</a>, <a href="https://arxiv.org/format/2211.06774">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Large-Scale Bidirectional Training for Zero-Shot Image Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+T">Taehoon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Marsden%2C+M">Mark Marsden</a>, <a href="/search/cs?searchtype=author&amp;query=Ahn%2C+P">Pyunghwan Ahn</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sangyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sihaeng Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Sala%2C+A">Alessandra Sala</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.06774v3-abstract-short" style="display: inline;"> When trained on large-scale datasets, image captioning models can understand the content of images from a general domain but often fail to generate accurate, detailed captions. To improve performance, pretraining-and-finetuning has been a key strategy for image captioning. However, we find that large-scale bidirectional training between image and text enables zero-shot image captioning. In this pa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.06774v3-abstract-full').style.display = 'inline'; document.getElementById('2211.06774v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.06774v3-abstract-full" style="display: none;"> When trained on large-scale datasets, image captioning models can understand the content of images from a general domain but often fail to generate accurate, detailed captions. To improve performance, pretraining-and-finetuning has been a key strategy for image captioning. However, we find that large-scale bidirectional training between image and text enables zero-shot image captioning. In this paper, we introduce Bidirectional Image Text Training in largER Scale, BITTERS, an efficient training and inference framework for zero-shot image captioning. We also propose a new evaluation benchmark which comprises of high quality datasets and an extensive set of metrics to properly evaluate zero-shot captioning accuracy and societal bias. We additionally provide an efficient finetuning approach for keyword extraction. We show that careful selection of large-scale training set and model architecture is the key to achieving zero-shot image captioning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.06774v3-abstract-full').style.display = 'none'; document.getElementById('2211.06774v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Arxiv Preprint. Work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03279">arXiv:2211.03279</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.03279">pdf</a>, <a href="https://arxiv.org/format/2211.03279">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> A Context-Aware Computational Approach for Measuring Vocal Entrainment in Dyadic Conversations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lahiri%2C+R">Rimita Lahiri</a>, <a href="/search/cs?searchtype=author&amp;query=Nasir%2C+M">Md Nasir</a>, <a href="/search/cs?searchtype=author&amp;query=Lord%2C+C">Catherine Lord</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">So Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanan%2C+S">Shrikanth Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03279v1-abstract-short" style="display: inline;"> Vocal entrainment is a social adaptation mechanism in human interaction, knowledge of which can offer useful insights to an individual&#39;s cognitive-behavioral characteristics. We propose a context-aware approach for measuring vocal entrainment in dyadic conversations. We use conformers(a combination of convolutional network and transformer) for capturing both short-term and long-term conversational&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03279v1-abstract-full').style.display = 'inline'; document.getElementById('2211.03279v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03279v1-abstract-full" style="display: none;"> Vocal entrainment is a social adaptation mechanism in human interaction, knowledge of which can offer useful insights to an individual&#39;s cognitive-behavioral characteristics. We propose a context-aware approach for measuring vocal entrainment in dyadic conversations. We use conformers(a combination of convolutional network and transformer) for capturing both short-term and long-term conversational context to model entrainment patterns in interactions across different domains. Specifically we use cross-subject attention layers to learn intra- as well as inter-personal signals from dyadic conversations. We first validate the proposed method based on classification experiments to distinguish between real(consistent) and fake(inconsistent/shuffled) conversations. Experimental results on interactions involving individuals with Autism Spectrum Disorder also show evidence of a statistically-significant association between the introduced entrainment measure and clinical scores relevant to symptoms, including across gender and age groups. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03279v1-abstract-full').style.display = 'none'; document.getElementById('2211.03279v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.00003">arXiv:2211.00003</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.00003">pdf</a>, <a href="https://arxiv.org/format/2211.00003">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MEDS-Net: Self-Distilled Multi-Encoders Network with Bi-Direction Maximum Intensity projections for Lung Nodule Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Usman%2C+M">Muhammad Usman</a>, <a href="/search/cs?searchtype=author&amp;query=Rehman%2C+A">Azka Rehman</a>, <a href="/search/cs?searchtype=author&amp;query=Shahid%2C+A">Abdullah Shahid</a>, <a href="/search/cs?searchtype=author&amp;query=Latif%2C+S">Siddique Latif</a>, <a href="/search/cs?searchtype=author&amp;query=Byon%2C+S+S">Shi Sub Byon</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+B+D">Byoung Dai Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sung Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+B+i">Byung il Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+Y+G">Yeong Gil Shin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.00003v2-abstract-short" style="display: inline;"> In this study, we propose a lung nodule detection scheme which fully incorporates the clinic workflow of radiologists. Particularly, we exploit Bi-Directional Maximum intensity projection (MIP) images of various thicknesses (i.e., 3, 5 and 10mm) along with a 3D patch of CT scan, consisting of 10 adjacent slices to feed into self-distillation-based Multi-Encoders Network (MEDS-Net). The proposed ar&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00003v2-abstract-full').style.display = 'inline'; document.getElementById('2211.00003v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.00003v2-abstract-full" style="display: none;"> In this study, we propose a lung nodule detection scheme which fully incorporates the clinic workflow of radiologists. Particularly, we exploit Bi-Directional Maximum intensity projection (MIP) images of various thicknesses (i.e., 3, 5 and 10mm) along with a 3D patch of CT scan, consisting of 10 adjacent slices to feed into self-distillation-based Multi-Encoders Network (MEDS-Net). The proposed architecture first condenses 3D patch input to three channels by using a dense block which consists of dense units which effectively examine the nodule presence from 2D axial slices. This condensed information, along with the forward and backward MIP images, is fed to three different encoders to learn the most meaningful representation, which is forwarded into the decoded block at various levels. At the decoder block, we employ a self-distillation mechanism by connecting the distillation block, which contains five lung nodule detectors. It helps to expedite the convergence and improves the learning ability of the proposed architecture. Finally, the proposed scheme reduces the false positives by complementing the main detector with auxiliary detectors. The proposed scheme has been rigorously evaluated on 888 scans of LUNA16 dataset and obtained a CPM score of 93.6\%. The results demonstrate that incorporating of bi-direction MIP images enables MEDS-Net to effectively distinguish nodules from surroundings which help to achieve the sensitivity of 91.5% and 92.8% with false positives rate of 0.25 and 0.5 per scan, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00003v2-abstract-full').style.display = 'none'; document.getElementById('2211.00003v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.03739">arXiv:2210.03739</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.03739">pdf</a>, <a href="https://arxiv.org/format/2210.03739">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Dual-Stage Deeply Supervised Attention-based Convolutional Neural Networks for Mandibular Canal Segmentation in CBCT Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Rehman%2C+A">Azka Rehman</a>, <a href="/search/cs?searchtype=author&amp;query=Usman%2C+M">Muhammad Usman</a>, <a href="/search/cs?searchtype=author&amp;query=Jawaid%2C+R">Rabeea Jawaid</a>, <a href="/search/cs?searchtype=author&amp;query=Saleem%2C+A+M">Amal Muhammad Saleem</a>, <a href="/search/cs?searchtype=author&amp;query=Byon%2C+S+S">Shi Sub Byon</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sung Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+B+D">Byoung Dai Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+B+i">Byung il Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+Y+G">Yeong Gil Shin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.03739v4-abstract-short" style="display: inline;"> Accurate segmentation of mandibular canals in lower jaws is important in dental implantology. Medical experts determine the implant position and dimensions manually from 3D CT images to avoid damaging the mandibular nerve inside the canal. In this paper, we propose a novel dual-stage deep learning-based scheme for the automatic segmentation of the mandibular canal. Particularly, we first enhance t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03739v4-abstract-full').style.display = 'inline'; document.getElementById('2210.03739v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.03739v4-abstract-full" style="display: none;"> Accurate segmentation of mandibular canals in lower jaws is important in dental implantology. Medical experts determine the implant position and dimensions manually from 3D CT images to avoid damaging the mandibular nerve inside the canal. In this paper, we propose a novel dual-stage deep learning-based scheme for the automatic segmentation of the mandibular canal. Particularly, we first enhance the CBCT scans by employing the novel histogram-based dynamic windowing scheme, which improves the visibility of mandibular canals. After enhancement, we design 3D deeply supervised attention U-Net architecture for localizing the volumes of interest (VOIs), which contain the mandibular canals (i.e., left and right canals). Finally, we employed the multi-scale input residual U-Net architecture (MS-R-UNet) to segment the mandibular canals using VOIs accurately. The proposed method has been rigorously evaluated on 500 scans. The results demonstrate that our technique outperforms the current state-of-the-art segmentation performance and robustness methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03739v4-abstract-full').style.display = 'none'; document.getElementById('2210.03739v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 Pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.13430">arXiv:2209.13430</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.13430">pdf</a>, <a href="https://arxiv.org/format/2209.13430">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> UniCLIP: Unified Framework for Contrastive Language-Image Pre-training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+J">Janghyeon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jongsuk Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Shon%2C+H">Hyounguk Shon</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+B">Bumsoo Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Honglak Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Junmo Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.13430v2-abstract-short" style="display: inline;"> Pre-training vision-language models with contrastive objectives has shown promising results that are both scalable to large uncurated datasets and transferable to many downstream applications. Some following works have targeted to improve data efficiency by adding self-supervision terms, but inter-domain (image-text) contrastive loss and intra-domain (image-image) contrastive loss are defined on i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.13430v2-abstract-full').style.display = 'inline'; document.getElementById('2209.13430v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.13430v2-abstract-full" style="display: none;"> Pre-training vision-language models with contrastive objectives has shown promising results that are both scalable to large uncurated datasets and transferable to many downstream applications. Some following works have targeted to improve data efficiency by adding self-supervision terms, but inter-domain (image-text) contrastive loss and intra-domain (image-image) contrastive loss are defined on individual spaces in those works, so many feasible combinations of supervision are overlooked. To overcome this issue, we propose UniCLIP, a Unified framework for Contrastive Language-Image Pre-training. UniCLIP integrates the contrastive loss of both inter-domain pairs and intra-domain pairs into a single universal space. The discrepancies that occur when integrating contrastive loss between different domains are resolved by the three key components of UniCLIP: (1) augmentation-aware feature embedding, (2) MP-NCE loss, and (3) domain dependent similarity measure. UniCLIP outperforms previous vision-language pre-training methods on various single- and multi-modality downstream tasks. In our experiments, we show that each component that comprises UniCLIP contributes well to the final performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.13430v2-abstract-full').style.display = 'none'; document.getElementById('2209.13430v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Neural Information Processing Systems (NeurIPS) 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.08112">arXiv:2208.08112</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2208.08112">pdf</a>, <a href="https://arxiv.org/format/2208.08112">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DLCFT: Deep Linear Continual Fine-Tuning for General Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shon%2C+H">Hyounguk Shon</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+J">Janghyeon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Junmo Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.08112v1-abstract-short" style="display: inline;"> Pre-trained representation is one of the key elements in the success of modern deep learning. However, existing works on continual learning methods have mostly focused on learning models incrementally from scratch. In this paper, we explore an alternative framework to incremental learning where we continually fine-tune the model from a pre-trained representation. Our method takes advantage of line&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.08112v1-abstract-full').style.display = 'inline'; document.getElementById('2208.08112v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.08112v1-abstract-full" style="display: none;"> Pre-trained representation is one of the key elements in the success of modern deep learning. However, existing works on continual learning methods have mostly focused on learning models incrementally from scratch. In this paper, we explore an alternative framework to incremental learning where we continually fine-tune the model from a pre-trained representation. Our method takes advantage of linearization technique of a pre-trained neural network for simple and effective continual learning. We show that this allows us to design a linear model where quadratic parameter regularization method is placed as the optimal continual learning policy, and at the same time enjoying the high performance of neural networks. We also show that the proposed algorithm enables parameter regularization methods to be applied to class-incremental problems. Additionally, we provide a theoretical reason why the existing parameter-space regularization algorithms such as EWC underperform on neural networks trained with cross-entropy loss. We show that the proposed method can prevent forgetting while achieving high continual fine-tuning performance on image classification tasks. To show that our method can be applied to general continual learning settings, we evaluate our method in data-incremental, task-incremental, and class-incremental learning problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.08112v1-abstract-full').style.display = 'none'; document.getElementById('2208.08112v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">European Conference on Computer Vision (ECCV) 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.07682">arXiv:2203.07682</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.07682">pdf</a>, <a href="https://arxiv.org/format/2203.07682">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enriched CNN-Transformer Feature Aggregation Networks for Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yoo%2C+J">Jinsu Yoo</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+T">Taehoon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sihaeng Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Honglak Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+T+H">Tae Hyun Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.07682v3-abstract-short" style="display: inline;"> Recent transformer-based super-resolution (SR) methods have achieved promising results against conventional CNN-based methods. However, these approaches suffer from essential shortsightedness created by only utilizing the standard self-attention-based reasoning. In this paper, we introduce an effective hybrid SR network to aggregate enriched features, including local features from CNNs and long-ra&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.07682v3-abstract-full').style.display = 'inline'; document.getElementById('2203.07682v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.07682v3-abstract-full" style="display: none;"> Recent transformer-based super-resolution (SR) methods have achieved promising results against conventional CNN-based methods. However, these approaches suffer from essential shortsightedness created by only utilizing the standard self-attention-based reasoning. In this paper, we introduce an effective hybrid SR network to aggregate enriched features, including local features from CNNs and long-range multi-scale dependencies captured by transformers. Specifically, our network comprises transformer and convolutional branches, which synergetically complement each representation during the restoration procedure. Furthermore, we propose a cross-scale token attention module, allowing the transformer branch to exploit the informative relationships among tokens across different scales efficiently. Our proposed method achieves state-of-the-art SR results on numerous benchmark datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.07682v3-abstract-full').style.display = 'none'; document.getElementById('2203.07682v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WACV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.07741">arXiv:2202.07741</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2202.07741">pdf</a>, <a href="https://arxiv.org/format/2202.07741">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Disentangling Successor Features for Coordination in Multi-agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Van+Stralen%2C+N">Neale Van Stralen</a>, <a href="/search/cs?searchtype=author&amp;query=Chowdhary%2C+G">Girish Chowdhary</a>, <a href="/search/cs?searchtype=author&amp;query=Tran%2C+H+T">Huy T. Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.07741v1-abstract-short" style="display: inline;"> Multi-agent reinforcement learning (MARL) is a promising framework for solving complex tasks with many agents. However, a key challenge in MARL is defining private utility functions that ensure coordination when training decentralized agents. This challenge is especially prevalent in unstructured tasks with sparse rewards and many agents. We show that successor features can help address this chall&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07741v1-abstract-full').style.display = 'inline'; document.getElementById('2202.07741v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.07741v1-abstract-full" style="display: none;"> Multi-agent reinforcement learning (MARL) is a promising framework for solving complex tasks with many agents. However, a key challenge in MARL is defining private utility functions that ensure coordination when training decentralized agents. This challenge is especially prevalent in unstructured tasks with sparse rewards and many agents. We show that successor features can help address this challenge by disentangling an individual agent&#39;s impact on the global value function from that of all other agents. We use this disentanglement to compactly represent private utilities that support stable training of decentralized agents in unstructured tasks. We implement our approach using a centralized training, decentralized execution architecture and test it in a variety of multi-agent environments. Our results show improved performance and training time relative to existing methods and suggest that disentanglement of successor features offers a promising approach to coordination in MARL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07741v1-abstract-full').style.display = 'none'; document.getElementById('2202.07741v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper is accepted in AAMAS 2022 (International Conference on Autonomous Agents and Multiagent Systems)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.00343">arXiv:2112.00343</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2112.00343">pdf</a>, <a href="https://arxiv.org/format/2112.00343">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Camera Motion Agnostic 3D Human Pose Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seong Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Jeong%2C+S">Sunwon Jeong</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+S">Sungbum Park</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+J+Y">Ju Yong Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.00343v1-abstract-short" style="display: inline;"> Although the performance of 3D human pose and shape estimation methods has improved significantly in recent years, existing approaches typically generate 3D poses defined in camera or human-centered coordinate system. This makes it difficult to estimate a person&#39;s pure pose and motion in world coordinate system for a video captured using a moving camera. To address this issue, this paper presents&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.00343v1-abstract-full').style.display = 'inline'; document.getElementById('2112.00343v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.00343v1-abstract-full" style="display: none;"> Although the performance of 3D human pose and shape estimation methods has improved significantly in recent years, existing approaches typically generate 3D poses defined in camera or human-centered coordinate system. This makes it difficult to estimate a person&#39;s pure pose and motion in world coordinate system for a video captured using a moving camera. To address this issue, this paper presents a camera motion agnostic approach for predicting 3D human pose and mesh defined in the world coordinate system. The core idea of the proposed approach is to estimate the difference between two adjacent global poses (i.e., global motion) that is invariant to selecting the coordinate system, instead of the global pose coupled to the camera motion. To this end, we propose a network based on bidirectional gated recurrent units (GRUs) that predicts the global motion sequence from the local pose sequence consisting of relative rotations of joints called global motion regressor (GMR). We use 3DPW and synthetic datasets, which are constructed in a moving-camera environment, for evaluation. We conduct extensive experiments and prove the effectiveness of the proposed method empirically. Code and datasets are available at https://github.com/seonghyunkim1212/GMR <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.00343v1-abstract-full').style.display = 'none'; document.getElementById('2112.00343v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.11133">arXiv:2111.11133</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2111.11133">pdf</a>, <a href="https://arxiv.org/format/2111.11133">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> L-Verse: Bidirectional Generation Between Image and Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+T">Taehoon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+G">Gwangmo Song</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sihaeng Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sangyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Seo%2C+Y">Yewon Seo</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Soonyoung Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Honglak Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+K">Kyunghoon Bae</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.11133v11-abstract-short" style="display: inline;"> Far beyond learning long-range interactions of natural language, transformers are becoming the de-facto standard for many vision tasks with their power and scalability. Especially with cross-modal tasks between image and text, vector quantized variational autoencoders (VQ-VAEs) are widely used to make a raw RGB image into a sequence of feature vectors. To better leverage the correlation between im&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.11133v11-abstract-full').style.display = 'inline'; document.getElementById('2111.11133v11-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.11133v11-abstract-full" style="display: none;"> Far beyond learning long-range interactions of natural language, transformers are becoming the de-facto standard for many vision tasks with their power and scalability. Especially with cross-modal tasks between image and text, vector quantized variational autoencoders (VQ-VAEs) are widely used to make a raw RGB image into a sequence of feature vectors. To better leverage the correlation between image and text, we propose L-Verse, a novel architecture consisting of feature-augmented variational autoencoder (AugVAE) and bidirectional auto-regressive transformer (BiART) for image-to-text and text-to-image generation. Our AugVAE shows the state-of-the-art reconstruction performance on ImageNet1K validation set, along with the robustness to unseen images in the wild. Unlike other models, BiART can distinguish between image (or text) as a conditional reference and a generation target. L-Verse can be directly used for image-to-text or text-to-image generation without any finetuning or extra object detection framework. In quantitative and qualitative experiments, L-Verse shows impressive results against previous methods in both image-to-text and text-to-image generation on MS-COCO Captions. We furthermore assess the scalability of L-Verse architecture on Conceptual Captions and present the initial result of bidirectional vision-language representation learning on general domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.11133v11-abstract-full').style.display = 'none'; document.getElementById('2111.11133v11-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CVPR 2022 as Oral Presentation (18 pages, 14 figures, 4 tables)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.14874">arXiv:2110.14874</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.14874">pdf</a>, <a href="https://arxiv.org/format/2110.14874">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Sayer: Using Implicit Feedback to Optimize System Policies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=L%C3%A9cuyer%2C+M">Mathias L茅cuyer</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sang Hoon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Nanavati%2C+M">Mihir Nanavati</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+J">Junchen Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Sen%2C+S">Siddhartha Sen</a>, <a href="/search/cs?searchtype=author&amp;query=Sharma%2C+A">Amit Sharma</a>, <a href="/search/cs?searchtype=author&amp;query=Slivkins%2C+A">Aleksandrs Slivkins</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.14874v1-abstract-short" style="display: inline;"> We observe that many system policies that make threshold decisions involving a resource (e.g., time, memory, cores) naturally reveal additional, or implicit feedback. For example, if a system waits X min for an event to occur, then it automatically learns what would have happened if it waited &lt;X min, because time has a cumulative property. This feedback tells us about alternative decisions, and ca&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.14874v1-abstract-full').style.display = 'inline'; document.getElementById('2110.14874v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.14874v1-abstract-full" style="display: none;"> We observe that many system policies that make threshold decisions involving a resource (e.g., time, memory, cores) naturally reveal additional, or implicit feedback. For example, if a system waits X min for an event to occur, then it automatically learns what would have happened if it waited &lt;X min, because time has a cumulative property. This feedback tells us about alternative decisions, and can be used to improve the system policy. However, leveraging implicit feedback is difficult because it tends to be one-sided or incomplete, and may depend on the outcome of the event. As a result, existing practices for using feedback, such as simply incorporating it into a data-driven model, suffer from bias. We develop a methodology, called Sayer, that leverages implicit feedback to evaluate and train new system policies. Sayer builds on two ideas from reinforcement learning -- randomized exploration and unbiased counterfactual estimators -- to leverage data collected by an existing policy to estimate the performance of new candidate policies, without actually deploying those policies. Sayer uses implicit exploration and implicit data augmentation to generate implicit feedback in an unbiased form, which is then used by an implicit counterfactual estimator to evaluate and train new policies. The key idea underlying these techniques is to assign implicit probabilities to decisions that are not actually taken but whose feedback can be inferred; these probabilities are carefully calculated to ensure statistical unbiasedness. We apply Sayer to two production scenarios in Azure, and show that it can evaluate arbitrary policies accurately, and train new policies that outperform the production policies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.14874v1-abstract-full').style.display = 'none'; document.getElementById('2110.14874v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.08372">arXiv:2109.08372</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.08372">pdf</a>, <a href="https://arxiv.org/format/2109.08372">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> A physics-informed, vision-based method to reconstruct all deformation modes in slender bodies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+H">Heng-Sheng Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Shih%2C+C">Chia-Hsien Shih</a>, <a href="/search/cs?searchtype=author&amp;query=Uppalapati%2C+N+K">Naveen Kumar Uppalapati</a>, <a href="/search/cs?searchtype=author&amp;query=Halder%2C+U">Udit Halder</a>, <a href="/search/cs?searchtype=author&amp;query=Krishnan%2C+G">Girish Krishnan</a>, <a href="/search/cs?searchtype=author&amp;query=Mehta%2C+P+G">Prashant G. Mehta</a>, <a href="/search/cs?searchtype=author&amp;query=Gazzola%2C+M">Mattia Gazzola</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.08372v1-abstract-short" style="display: inline;"> This paper is concerned with the problem of estimating (interpolating and smoothing) the shape (pose and the six modes of deformation) of a slender flexible body from multiple camera measurements. This problem is important in both biology, where slender, soft, and elastic structures are ubiquitously encountered across species, and in engineering, particularly in the area of soft robotics. The prop&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08372v1-abstract-full').style.display = 'inline'; document.getElementById('2109.08372v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.08372v1-abstract-full" style="display: none;"> This paper is concerned with the problem of estimating (interpolating and smoothing) the shape (pose and the six modes of deformation) of a slender flexible body from multiple camera measurements. This problem is important in both biology, where slender, soft, and elastic structures are ubiquitously encountered across species, and in engineering, particularly in the area of soft robotics. The proposed mathematical formulation for shape estimation is physics-informed, based on the use of the special Cosserat rod theory whose equations encode slender body mechanics in the presence of bending, shearing, twisting and stretching. The approach is used to derive numerical algorithms which are experimentally demonstrated for fiber reinforced and cable-driven soft robot arms. These experimental demonstrations show that the methodology is accurate (&lt;5 mm error, three times less than the arm diameter) and robust to noise and uncertainties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08372v1-abstract-full').style.display = 'none'; document.getElementById('2109.08372v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE RA-L with ICRA 2022 for possible publication. Copyright may be transferred without notice. For associated data and code, see https://github.com/GazzolaLab/BR2-vision-based-smoothing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.01086">arXiv:2106.01086</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.01086">pdf</a>, <a href="https://arxiv.org/format/2106.01086">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1080/00207543.2020.1870013">10.1080/00207543.2020.1870013 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Learning to schedule job-shop problems: Representation and policy learning using graph neural network and reinforcement learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Junyoung Park</a>, <a href="/search/cs?searchtype=author&amp;query=Chun%2C+J">Jaehyeong Chun</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sang Hun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+Y">Youngkook Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Jinkyoo Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.01086v1-abstract-short" style="display: inline;"> We propose a framework to learn to schedule a job-shop problem (JSSP) using a graph neural network (GNN) and reinforcement learning (RL). We formulate the scheduling process of JSSP as a sequential decision-making problem with graph representation of the state to consider the structure of JSSP. In solving the formulated problem, the proposed framework employs a GNN to learn that node features that&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01086v1-abstract-full').style.display = 'inline'; document.getElementById('2106.01086v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.01086v1-abstract-full" style="display: none;"> We propose a framework to learn to schedule a job-shop problem (JSSP) using a graph neural network (GNN) and reinforcement learning (RL). We formulate the scheduling process of JSSP as a sequential decision-making problem with graph representation of the state to consider the structure of JSSP. In solving the formulated problem, the proposed framework employs a GNN to learn that node features that embed the spatial structure of the JSSP represented as a graph (representation learning) and derive the optimum scheduling policy that maps the embedded node features to the best scheduling action (policy learning). We employ Proximal Policy Optimization (PPO) based RL strategy to train these two modules in an end-to-end fashion. We empirically demonstrate that the GNN scheduler, due to its superb generalization capability, outperforms practically favored dispatching rules and RL-based schedulers on various benchmark JSSP. We also confirmed that the proposed framework learns a transferable scheduling policy that can be employed to schedule a completely new JSSP (in terms of size and parameters) without further training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01086v1-abstract-full').style.display = 'none'; document.getElementById('2106.01086v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> International Journal of Production Research International Journal of Production Research, Volume 59, 2021 - Issue 11, Pages 3360-3377 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.02043">arXiv:2008.02043</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2008.02043">pdf</a>, <a href="https://arxiv.org/format/2008.02043">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Learning Boost by Exploiting the Auxiliary Task in Multi-task Domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yim%2C+J">Jonghwa Yim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sang Hwan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.02043v1-abstract-short" style="display: inline;"> Learning two tasks in a single shared function has some benefits. Firstly by acquiring information from the second task, the shared function leverages useful information that could have been neglected or underestimated in the first task. Secondly, it helps to generalize the function that can be learned using generally applicable information for both tasks. To fully enjoy these benefits, Multi-task&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.02043v1-abstract-full').style.display = 'inline'; document.getElementById('2008.02043v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.02043v1-abstract-full" style="display: none;"> Learning two tasks in a single shared function has some benefits. Firstly by acquiring information from the second task, the shared function leverages useful information that could have been neglected or underestimated in the first task. Secondly, it helps to generalize the function that can be learned using generally applicable information for both tasks. To fully enjoy these benefits, Multi-task Learning (MTL) has long been researched in various domains such as computer vision, language understanding, and speech synthesis. While MTL benefits from the positive transfer of information from multiple tasks, in a real environment, tasks inevitably have a conflict between them during the learning phase, called negative transfer. The negative transfer hampers function from achieving the optimality and degrades the performance. To solve the problem of the task conflict, previous works only suggested partial solutions that are not fundamental, but ad-hoc. A common approach is using a weighted sum of losses. The weights are adjusted to induce positive transfer. Paradoxically, this kind of solution acknowledges the problem of negative transfer and cannot remove it unless the weight of the task is set to zero. Therefore, these previous methods had limited success. In this paper, we introduce a novel approach that can drive positive transfer and suppress negative transfer by leveraging class-wise weights in the learning process. The weights act as an arbitrator of the fundamental unit of information to determine its positive or negative status to the main task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.02043v1-abstract-full').style.display = 'none'; document.getElementById('2008.02043v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.09635">arXiv:2007.09635</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2007.09635">pdf</a>, <a href="https://arxiv.org/format/2007.09635">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Meta-learning with Latent Space Clustering in Generative Adversarial Network for Speaker Diarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pal%2C+M">Monisankha Pal</a>, <a href="/search/cs?searchtype=author&amp;query=Kumar%2C+M">Manoj Kumar</a>, <a href="/search/cs?searchtype=author&amp;query=Peri%2C+R">Raghuveer Peri</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+T+J">Tae Jin Park</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">So Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lord%2C+C">Catherine Lord</a>, <a href="/search/cs?searchtype=author&amp;query=Bishop%2C+S">Somer Bishop</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanan%2C+S">Shrikanth Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.09635v1-abstract-short" style="display: inline;"> The performance of most speaker diarization systems with x-vector embeddings is both vulnerable to noisy environments and lacks domain robustness. Earlier work on speaker diarization using generative adversarial network (GAN) with an encoder network (ClusterGAN) to project input x-vectors into a latent space has shown promising performance on meeting data. In this paper, we extend the ClusterGAN n&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.09635v1-abstract-full').style.display = 'inline'; document.getElementById('2007.09635v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.09635v1-abstract-full" style="display: none;"> The performance of most speaker diarization systems with x-vector embeddings is both vulnerable to noisy environments and lacks domain robustness. Earlier work on speaker diarization using generative adversarial network (GAN) with an encoder network (ClusterGAN) to project input x-vectors into a latent space has shown promising performance on meeting data. In this paper, we extend the ClusterGAN network to improve diarization robustness and enable rapid generalization across various challenging domains. To this end, we fetch the pre-trained encoder from the ClusterGAN and fine-tune it by using prototypical loss (meta-ClusterGAN or MCGAN) under the meta-learning paradigm. Experiments are conducted on CALLHOME telephonic conversations, AMI meeting data, DIHARD II (dev set) which includes challenging multi-domain corpus, and two child-clinician interaction corpora (ADOS, BOSCC) related to the autism spectrum disorder domain. Extensive analyses of the experimental data are done to investigate the effectiveness of the proposed ClusterGAN and MCGAN embeddings over x-vectors. The results show that the proposed embeddings with normalized maximum eigengap spectral clustering (NME-SC) back-end consistently outperform Kaldi state-of-the-art z-vector diarization system. Finally, we employ embedding fusion with x-vectors to provide further improvement in diarization performance. We achieve a relative diarization error rate (DER) improvement of 6.67% to 53.93% on the aforementioned datasets using the proposed fused embeddings over x-vectors. Besides, the MCGAN embeddings provide better performance in the number of speakers estimation and short speech segment diarization as compared to x-vectors and ClusterGAN in telephonic data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.09635v1-abstract-full').style.display = 'none'; document.getElementById('2007.09635v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE/ACM TRANSACTIONS ON AUDIO SPEECH AND LANGUAGE PROCESSING</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.01084">arXiv:2004.01084</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.01084">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1088/1748-9326/ab8847">10.1088/1748-9326/ab8847 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Patterns of population displacement during mega-fires in California detected using Facebook Disaster Maps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jia%2C+S">Shenyue Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hee Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Nghiem%2C+S+V">Son V. Nghiem</a>, <a href="/search/cs?searchtype=author&amp;query=Doherty%2C+P">Paul Doherty</a>, <a href="/search/cs?searchtype=author&amp;query=Kafatos%2C+M">Menas Kafatos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.01084v1-abstract-short" style="display: inline;"> Facebook Disaster Maps (FBDM) is the first platform providing analysis-ready population change products derived from crowdsourced data targeting disaster relief practices. We evaluate the representativeness of FBDM data using the Mann-Kendall test and emerging hot and cold spots in an anomaly analysis to reveal the trend, magnitude, and agglommeration of population displacement during the Mendocin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.01084v1-abstract-full').style.display = 'inline'; document.getElementById('2004.01084v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.01084v1-abstract-full" style="display: none;"> Facebook Disaster Maps (FBDM) is the first platform providing analysis-ready population change products derived from crowdsourced data targeting disaster relief practices. We evaluate the representativeness of FBDM data using the Mann-Kendall test and emerging hot and cold spots in an anomaly analysis to reveal the trend, magnitude, and agglommeration of population displacement during the Mendocino Complex and Woolsey fires in California, USA. Our results show that the distribution of FBDM pre-crisis users fits well with the total population from different sources. Due to usage habits, the elder population is underrepresented in FBDM data. During the two mega-fires in California, FBDM data effectively captured the temporal change of population arising from the placing and lifting of evacuation orders. Coupled with monotonic trends, the fall and rise of cold and hot spots of population revealed the areas with the greatest population drop and potential places to house the displaced residents. A comparison between the Mendocino Complex and Woolsey fires indicates that a densely populated region can be evacuated faster than a scarcely populated one, possibly due to the better access to transportation. In sparsely populated fire-prone areas, resources should be prioritized to move people to shelters as the displaced residents do not have many alternative options, while their counterparts in densely populated areas can utilize their social connections to seek temporary stay at nearby locations during an evacuation. Integrated with an assessment on underrepresented communities, FBDM data and the derivatives can provide much needed information of near real-time population displacement for crisis response and disaster relief. As applications and data generation mature, FBDM will harness crowdsourced data and aid first responder decision-making. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.01084v1-abstract-full').style.display = 'none'; document.getElementById('2004.01084v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages with supplemental information</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.13335">arXiv:1912.13335</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1912.13335">pdf</a>, <a href="https://arxiv.org/format/1912.13335">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Volumetric Lung Nodule Segmentation using Adaptive ROI with Multi-View Residual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Usman%2C+M">Muhammad Usman</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+B">Byoung-Dai Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Byon%2C+S+S">Shi Sub Byon</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sung Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Byung-ilLee"> Byung-ilLee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.13335v2-abstract-short" style="display: inline;"> Accurate quantification of pulmonary nodules can greatly assist the early diagnosis of lung cancer, which can enhance patient survival possibilities. A number of nodule segmentation techniques have been proposed, however, all of the existing techniques rely on radiologist 3-D volume of interest (VOI) input or use the constant region of interest (ROI) and only investigate the presence of nodule vox&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.13335v2-abstract-full').style.display = 'inline'; document.getElementById('1912.13335v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.13335v2-abstract-full" style="display: none;"> Accurate quantification of pulmonary nodules can greatly assist the early diagnosis of lung cancer, which can enhance patient survival possibilities. A number of nodule segmentation techniques have been proposed, however, all of the existing techniques rely on radiologist 3-D volume of interest (VOI) input or use the constant region of interest (ROI) and only investigate the presence of nodule voxels within the given VOI. Such approaches restrain the solutions to investigate the nodule presence outside the given VOI and also include the redundant structures into VOI, which may lead to inaccurate nodule segmentation. In this work, a novel semi-automated approach for 3-D segmentation of nodule in volumetric computerized tomography (CT) lung scans has been proposed. The proposed technique can be segregated into two stages, at the first stage, it takes a 2-D ROI containing the nodule as input and it performs patch-wise investigation along the axial axis with a novel adaptive ROI strategy. The adaptive ROI algorithm enables the solution to dynamically select the ROI for the surrounding slices to investigate the presence of nodule using deep residual U-Net architecture. The first stage provides the initial estimation of nodule which is further utilized to extract the VOI. At the second stage, the extracted VOI is further investigated along the coronal and sagittal axis with two different networks and finally, all the estimated masks are fed into the consensus module to produce the final volumetric segmentation of nodule. The proposed approach has been rigorously evaluated on the LIDC dataset, which is the largest publicly available dataset. The result suggests that the approach is significantly robust and accurate as compared to the previous state of the art techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.13335v2-abstract-full').style.display = 'none'; document.getElementById('1912.13335v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The manuscript is currently under review and copyright shall be transferred to the publisher upon acceptance</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.11400">arXiv:1910.11400</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1910.11400">pdf</a>, <a href="https://arxiv.org/format/1910.11400">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Meta-learning for robust child-adult classification from speech </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Koluguri%2C+N+R">Nithin Rao Koluguri</a>, <a href="/search/cs?searchtype=author&amp;query=Kumar%2C+M">Manoj Kumar</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">So Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lord%2C+C">Catherine Lord</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanan%2C+S">Shrikanth Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.11400v2-abstract-short" style="display: inline;"> Computational modeling of naturalistic conversations in clinical applications has seen growing interest in the past decade. An important use-case involves child-adult interactions within the autism diagnosis and intervention domain. In this paper, we address a specific sub-problem of speaker diarization, namely child-adult speaker classification in such dyadic conversations with specified roles. T&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.11400v2-abstract-full').style.display = 'inline'; document.getElementById('1910.11400v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.11400v2-abstract-full" style="display: none;"> Computational modeling of naturalistic conversations in clinical applications has seen growing interest in the past decade. An important use-case involves child-adult interactions within the autism diagnosis and intervention domain. In this paper, we address a specific sub-problem of speaker diarization, namely child-adult speaker classification in such dyadic conversations with specified roles. Training a speaker classification system robust to speaker and channel conditions is challenging due to inherent variability in the speech within children and the adult interlocutors. In this work, we propose the use of meta-learning, in particular, prototypical networks which optimize a metric space across multiple tasks. By modeling every child-adult pair in the training set as a separate task during meta-training, we learn a representation with improved generalizability compared to conventional supervised learning. We demonstrate improvements over state-of-the-art speaker embeddings (x-vectors) under two evaluation settings: weakly supervised classification (up to 14.53% relative improvement in F1-scores) and clustering (up to relative 9.66% improvement in cluster purity). Our results show that protonets can potentially extract robust speaker embeddings for child-adult classification from speech. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.11400v2-abstract-full').style.display = 'none'; document.getElementById('1910.11400v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.11398">arXiv:1910.11398</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1910.11398">pdf</a>, <a href="https://arxiv.org/ps/1910.11398">ps</a>, <a href="https://arxiv.org/format/1910.11398">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Speaker diarization using latent space clustering in generative adversarial network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pal%2C+M">Monisankha Pal</a>, <a href="/search/cs?searchtype=author&amp;query=Kumar%2C+M">Manoj Kumar</a>, <a href="/search/cs?searchtype=author&amp;query=Peri%2C+R">Raghuveer Peri</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+T+J">Tae Jin Park</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">So Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lord%2C+C">Catherine Lord</a>, <a href="/search/cs?searchtype=author&amp;query=Bishop%2C+S">Somer Bishop</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanan%2C+S">Shrikanth Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.11398v1-abstract-short" style="display: inline;"> In this work, we propose deep latent space clustering for speaker diarization using generative adversarial network (GAN) backprojection with the help of an encoder network. The proposed diarization system is trained jointly with GAN loss, latent variable recovery loss, and a clustering-specific loss. It uses x-vector speaker embeddings at the input, while the latent variables are sampled from a co&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.11398v1-abstract-full').style.display = 'inline'; document.getElementById('1910.11398v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.11398v1-abstract-full" style="display: none;"> In this work, we propose deep latent space clustering for speaker diarization using generative adversarial network (GAN) backprojection with the help of an encoder network. The proposed diarization system is trained jointly with GAN loss, latent variable recovery loss, and a clustering-specific loss. It uses x-vector speaker embeddings at the input, while the latent variables are sampled from a combination of continuous random variables and discrete one-hot encoded variables using the original speaker labels. We benchmark our proposed system on the AMI meeting corpus, and two child-clinician interaction corpora (ADOS and BOSCC) from the autism diagnosis domain. ADOS and BOSCC contain diagnostic and treatment outcome sessions respectively obtained in clinical settings for verbal children and adolescents with autism. Experimental results show that our proposed system significantly outperform the state-of-the-art x-vector based diarization system on these databases. Further, we perform embedding fusion with x-vectors to achieve a relative DER improvement of 31%, 36% and 49% on AMI eval, ADOS and BOSCC corpora respectively, when compared to the x-vector baseline using oracle speech segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.11398v1-abstract-full').style.display = 'none'; document.getElementById('1910.11398v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICASSP 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1908.05007">arXiv:1908.05007</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1908.05007">pdf</a>, <a href="https://arxiv.org/format/1908.05007">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TASE.2019.2935792">10.1109/TASE.2019.2935792 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Robust Translational Force Control of Multi-Rotor UAV for Precise Acceleration Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S+J">Seung Jae Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seung Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H+J">H. Jin Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1908.05007v1-abstract-short" style="display: inline;"> In this paper, we introduce a translational force control method with disturbance observer (DOB)-based force disturbance cancellation for precise three-dimensional acceleration control of a multi-rotor UAV. The acceleration control of the multi-rotor requires conversion of the desired acceleration signal to the desired roll, pitch, and total thrust. But because the attitude dynamics and the thrust&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.05007v1-abstract-full').style.display = 'inline'; document.getElementById('1908.05007v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1908.05007v1-abstract-full" style="display: none;"> In this paper, we introduce a translational force control method with disturbance observer (DOB)-based force disturbance cancellation for precise three-dimensional acceleration control of a multi-rotor UAV. The acceleration control of the multi-rotor requires conversion of the desired acceleration signal to the desired roll, pitch, and total thrust. But because the attitude dynamics and the thrust dynamics are different, simple kinematic signal conversion without consideration of those difference can cause serious performance degradation in acceleration tracking. Unlike most existing translational force control techniques that are based on such simple inversion, our new method allows controlling the acceleration of the multi-rotor more precisely by considering the dynamics of the multi-rotor during the kinematic inversion. By combining the DOB with the translational force system that includes the improved conversion technique, we achieve robustness with respect to the external force disturbances that hinders the accurate acceleration control. mu-analysis is performed to ensure the robust stability of the overall closed-loop system, considering the combined effect of various possible model uncertainties. Both simulation and experiment are conducted to validate the proposed technique, which confirms the satisfactory performance to track the desired acceleration of the multi-rotor. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.05007v1-abstract-full').style.display = 'none'; document.getElementById('1908.05007v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 14 figures, Accepted in the T-ASE Journal on Aug. 10th, 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1807.08903">arXiv:1807.08903</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1807.08903">pdf</a>, <a href="https://arxiv.org/ps/1807.08903">ps</a>, <a href="https://arxiv.org/format/1807.08903">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Traffic-Aware Backscatter Communications in Wireless-Powered Heterogeneous Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sung Hoon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1807.08903v1-abstract-short" style="display: inline;"> With the emerging Internet-of-Things services, massive machine-to-machine (M2M) communication will be deployed on top of human-to-human (H2H) communication in the near future. Due to the coexistence of M2M and H2H communications, the performance of M2M (i.e., secondary) network depends largely on the H2H (i.e., primary) network. In this paper, we propose ambient backscatter communication for the M&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.08903v1-abstract-full').style.display = 'inline'; document.getElementById('1807.08903v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1807.08903v1-abstract-full" style="display: none;"> With the emerging Internet-of-Things services, massive machine-to-machine (M2M) communication will be deployed on top of human-to-human (H2H) communication in the near future. Due to the coexistence of M2M and H2H communications, the performance of M2M (i.e., secondary) network depends largely on the H2H (i.e., primary) network. In this paper, we propose ambient backscatter communication for the M2M network which exploits the energy (signal) sources of the H2H network, referring to traffic applications and popularity. In order to maximize the harvesting and transmission opportunities offered by varying traffic sources of the H2H network, we adopt a Bayesian nonparametric (BNP) learning algorithm to classify traffic applications (patterns) for secondary user (SU). We then analyze the performance of SU using the stochastic geometrical approach, based on a criterion for optimal traffic pattern selection. Results are presented to validate the performance of the proposed BNP classification algorithm and the criterion, as well as the impact of traffic sources and popularity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.08903v1-abstract-full').style.display = 'none'; document.getElementById('1807.08903v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1710.03299">arXiv:1710.03299</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1710.03299">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> A Review on the Applications of Crowdsourcing in Human Pathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Alialy%2C+R">Roshanak Alialy</a>, <a href="/search/cs?searchtype=author&amp;query=Tavakkol%2C+S">Sasan Tavakkol</a>, <a href="/search/cs?searchtype=author&amp;query=Tavakkol%2C+E">Elham Tavakkol</a>, <a href="/search/cs?searchtype=author&amp;query=Ghorbani-Aghbologhi%2C+A">Amir Ghorbani-Aghbologhi</a>, <a href="/search/cs?searchtype=author&amp;query=Ghaffarieh%2C+A">Alireza Ghaffarieh</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seon Ho Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Shahabi%2C+C">Cyrus Shahabi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1710.03299v2-abstract-short" style="display: inline;"> The advent of the digital pathology has introduced new avenues of diagnostic medicine. Among them, crowdsourcing has attracted researchers&#39; attention in the recent years, allowing them to engage thousands of untrained individuals in research and diagnosis. While there exist several articles in this regard, prior works have not collectively documented them. We, therefore, aim to review the applicat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1710.03299v2-abstract-full').style.display = 'inline'; document.getElementById('1710.03299v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1710.03299v2-abstract-full" style="display: none;"> The advent of the digital pathology has introduced new avenues of diagnostic medicine. Among them, crowdsourcing has attracted researchers&#39; attention in the recent years, allowing them to engage thousands of untrained individuals in research and diagnosis. While there exist several articles in this regard, prior works have not collectively documented them. We, therefore, aim to review the applications of crowdsourcing in human pathology in a semi-systematic manner. We firstly, introduce a novel method to do a systematic search of the literature. Utilizing this method, we, then, collect hundreds of articles and screen them against a pre-defined set of criteria. Furthermore, we crowdsource part of the screening process, to examine another potential application of crowdsourcing. Finally, we review the selected articles and characterize the prior uses of crowdsourcing in pathology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1710.03299v2-abstract-full').style.display = 'none'; document.getElementById('1710.03299v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1705.02009">arXiv:1705.02009</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1705.02009">pdf</a>, <a href="https://arxiv.org/ps/1705.02009">ps</a>, <a href="https://arxiv.org/format/1705.02009">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On Identifying Disaster-Related Tweets: Matching-based or Learning-based? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=To%2C+H">Hien To</a>, <a href="/search/cs?searchtype=author&amp;query=Agrawal%2C+S">Sumeet Agrawal</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seon Ho Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Shahabi%2C+C">Cyrus Shahabi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1705.02009v1-abstract-short" style="display: inline;"> Social media such as tweets are emerging as platforms contributing to situational awareness during disasters. Information shared on Twitter by both affected population (e.g., requesting assistance, warning) and those outside the impact zone (e.g., providing assistance) would help first responders, decision makers, and the public to understand the situation first-hand. Effective use of such informa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.02009v1-abstract-full').style.display = 'inline'; document.getElementById('1705.02009v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1705.02009v1-abstract-full" style="display: none;"> Social media such as tweets are emerging as platforms contributing to situational awareness during disasters. Information shared on Twitter by both affected population (e.g., requesting assistance, warning) and those outside the impact zone (e.g., providing assistance) would help first responders, decision makers, and the public to understand the situation first-hand. Effective use of such information requires timely selection and analysis of tweets that are relevant to a particular disaster. Even though abundant tweets are promising as a data source, it is challenging to automatically identify relevant messages since tweet are short and unstructured, resulting to unsatisfactory classification performance of conventional learning-based approaches. Thus, we propose a simple yet effective algorithm to identify relevant messages based on matching keywords and hashtags, and provide a comparison between matching-based and learning-based approaches. To evaluate the two approaches, we put them into a framework specifically proposed for analyzing disaster-related tweets. Analysis results on eleven datasets with various disaster types show that our technique provides relevant tweets of higher quality and more interpretable results of sentiment analysis tasks when compared to learning approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.02009v1-abstract-full').style.display = 'none'; document.getElementById('1705.02009v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 May, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1502.06654">arXiv:1502.06654</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1502.06654">pdf</a>, <a href="https://arxiv.org/ps/1502.06654">ps</a>, <a href="https://arxiv.org/format/1502.06654">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LCOMM.2015.2398866">10.1109/LCOMM.2015.2398866 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Variable-Length Feedback Codes under a Strict Delay Constraint </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Seong Hwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Sung%2C+D+K">Dan Keun Sung</a>, <a href="/search/cs?searchtype=author&amp;query=Le-Ngoc%2C+T">Tho Le-Ngoc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1502.06654v1-abstract-short" style="display: inline;"> We study variable-length feedback (VLF) codes under a strict delay constraint to maximize their average transmission rate (ATR) in a discrete memoryless channel (DMC) while considering periodic decoding attempts. We first derive a lower bound on the maximum achievable ATR, and confirm that the VLF code can outperform non-feedback codes with a larger delay constraint. We show that for a given decod&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1502.06654v1-abstract-full').style.display = 'inline'; document.getElementById('1502.06654v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1502.06654v1-abstract-full" style="display: none;"> We study variable-length feedback (VLF) codes under a strict delay constraint to maximize their average transmission rate (ATR) in a discrete memoryless channel (DMC) while considering periodic decoding attempts. We first derive a lower bound on the maximum achievable ATR, and confirm that the VLF code can outperform non-feedback codes with a larger delay constraint. We show that for a given decoding period, as the strict delay constraint, L, increases, the gap between the ATR of the VLF code and the DMC capacity scales at most on the order of O(L^{-1}) instead of O(L^{-1/2}) for non-feedback codes as shown in Polyanskiy et al. [&#34;Channel coding rate in the finite blocklengh regime,&#34; IEEE Trans. Inf. Theory, vol. 56, no. 5, pp. 2307-2359, May 2010.]. We also develop an approximation indicating that, for a given L, the achievable ATR increases as the decoding period decreases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1502.06654v1-abstract-full').style.display = 'none'; document.getElementById('1502.06654v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5pages, 1 figure, Accepted for publication in IEEE Communications Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1308.6217">arXiv:1308.6217</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1308.6217">pdf</a>, <a href="https://arxiv.org/format/1308.6217">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Other Computer Science">cs.OH</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.2514/1.D0067">10.2514/1.D0067 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Numerical Analysis of Gate Conflict Duration and Passenger Transit Time in Airport </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sang Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Feron%2C+E">Eric Feron</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1308.6217v1-abstract-short" style="display: inline;"> Robustness is as important as efficiency in air transportation. All components in the air traffic system are connected to form an interactive network. So, a disturbance that occurs in one component, for example, a severe delay at an airport, can influence the entire network. Delays are easily propagated between flights through gates, but the propagation can be reduced if gate assignments are robus&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1308.6217v1-abstract-full').style.display = 'inline'; document.getElementById('1308.6217v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1308.6217v1-abstract-full" style="display: none;"> Robustness is as important as efficiency in air transportation. All components in the air traffic system are connected to form an interactive network. So, a disturbance that occurs in one component, for example, a severe delay at an airport, can influence the entire network. Delays are easily propagated between flights through gates, but the propagation can be reduced if gate assignments are robust against stochastic delays. In this paper, we analyze gate delays and suggest an approach that involves assigning gates while making them robust against stochastic delays. We extract an example flight schedule from data source and generate schedules with increased traffic to analyze how the compact flight schedules impact the robustness of gate assignment. Simulation results show that our approach improves the robustness of gate assignment. Particularly, the robust gate assignment reduces average duration of gate conflicts by 96.3% and the number of gate conflicts by 96.7% compared to the baseline assignment. However, the robust gate assignment results in longer transit time for passengers, and a trade-off between the robustness of gate assignment and passenger transit time is presented. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1308.6217v1-abstract-full').style.display = 'none'; document.getElementById('1308.6217v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to Transportation Research Part B, and presented at AIAA Guidance, Navigation, and Control Conference in 2011 in part</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1306.3429">arXiv:1306.3429</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1306.3429">pdf</a>, <a href="https://arxiv.org/format/1306.3429">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Other Computer Science">cs.OH</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TITS.2013.2285499">10.1109/TITS.2013.2285499 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Impact of Gate Assignment on Gate-Holding Departure Control Strategies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sang Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Feron%2C+E">Eric Feron</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1306.3429v1-abstract-short" style="display: inline;"> Gate holding reduces congestion by reducing the number of aircraft present on the airport surface at any time, while not starving the runway. Because some departing flights are held at gates, there is a possibility that arriving flights cannot access the gates and have to wait until the gates are cleared. This is called a gate conflict. Robust gate assignment is an assignment that minimizes gate c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1306.3429v1-abstract-full').style.display = 'inline'; document.getElementById('1306.3429v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1306.3429v1-abstract-full" style="display: none;"> Gate holding reduces congestion by reducing the number of aircraft present on the airport surface at any time, while not starving the runway. Because some departing flights are held at gates, there is a possibility that arriving flights cannot access the gates and have to wait until the gates are cleared. This is called a gate conflict. Robust gate assignment is an assignment that minimizes gate conflicts by assigning gates to aircraft to maximize the time gap between two consecutive flights at the same gate; it makes gate assignment robust, but passengers may walk longer to transfer flights. In order to simulate the airport departure process, a queuing model is introduced. The model is calibrated and validated with actual data from New York La Guardia Airport (LGA) and a U.S. hub airport. Then, the model simulates the airport departure process with the current gate assignment and a robust gate assignment to assess the impact of gate assignment on gate-holding departure control. The results show that the robust gate assignment reduces the number of gate conflicts caused by gate holding compared to the current gate assignment. Therefore, robust gate assignment can be combined with gate-holding departure control to improve operations at congested airports with limited gate resources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1306.3429v1-abstract-full').style.display = 'none'; document.getElementById('1306.3429v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE Transactions on Intelligent Transportation Systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1306.3426">arXiv:1306.3426</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1306.3426">pdf</a>, <a href="https://arxiv.org/format/1306.3426">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Other Computer Science">cs.OH</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TITS.2013.2286271">10.1109/TITS.2013.2286271 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Valuating Surface Surveillance Technology for Collaborative Multiple-Spot Control of Airport Departure Operations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Burgain%2C+P">Pierrick Burgain</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sang Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Feron%2C+E">Eric Feron</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1306.3426v1-abstract-short" style="display: inline;"> Airport departure operations are a source of airline delays and passenger frustration. Excessive surface traffic is a cause of increased controller and pilot workload. It is also a source of increased emissions and delays, and does not yield improved runway throughput. Leveraging the extensive past research on airport departure management, this paper explores the environmental and safety benefits&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1306.3426v1-abstract-full').style.display = 'inline'; document.getElementById('1306.3426v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1306.3426v1-abstract-full" style="display: none;"> Airport departure operations are a source of airline delays and passenger frustration. Excessive surface traffic is a cause of increased controller and pilot workload. It is also a source of increased emissions and delays, and does not yield improved runway throughput. Leveraging the extensive past research on airport departure management, this paper explores the environmental and safety benefits that improved surveillance technologies can bring in the context of gate- or spot-release strategies. The paper shows that improved surveillance technologies can yield 4% to 6% reduction of aircraft on taxiway, and therefore emissions, in addition to the savings currently observed by implementing threshold starategies under evaluation at Boston Logan Airport and other busy airports during congested periods. These calculated benefits contrast sharply with our previous work, which relied on simplified airport ramp areas with a single departure spot, and where fewer environmental and economic benefits of advanced surface surveillance systems could be established. Our work is illustrated by its application to New-York LaGuardia and Seattle Tacoma airports. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1306.3426v1-abstract-full').style.display = 'none'; document.getElementById('1306.3426v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE Transactions on Intelligent Transportation Systems. arXiv admin note: substantial text overlap with arXiv:1102.2673</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1301.3535">arXiv:1301.3535</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1301.3535">pdf</a>, <a href="https://arxiv.org/format/1301.3535">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.2514/1.D0079">10.2514/1.D0079 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Airport Gate Scheduling for Passengers, Aircraft, and Operation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S+H">Sang Hyun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Feron%2C+E">Eric Feron</a>, <a href="/search/cs?searchtype=author&amp;query=Clarke%2C+J">John-Paul Clarke</a>, <a href="/search/cs?searchtype=author&amp;query=Marzuoli%2C+A">Aude Marzuoli</a>, <a href="/search/cs?searchtype=author&amp;query=Delahaye%2C+D">Daniel Delahaye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1301.3535v1-abstract-short" style="display: inline;"> Passengers&#39; experience is becoming a key metric to evaluate the air transportation system&#39;s performance. Efficient and robust tools to handle airport operations are needed along with a better understanding of passengers&#39; interests and concerns. Among various airport operations, this paper studies airport gate scheduling for improved passengers&#39; experience. Three objectives accounting for passenger&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1301.3535v1-abstract-full').style.display = 'inline'; document.getElementById('1301.3535v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1301.3535v1-abstract-full" style="display: none;"> Passengers&#39; experience is becoming a key metric to evaluate the air transportation system&#39;s performance. Efficient and robust tools to handle airport operations are needed along with a better understanding of passengers&#39; interests and concerns. Among various airport operations, this paper studies airport gate scheduling for improved passengers&#39; experience. Three objectives accounting for passengers, aircraft, and operation are presented. Trade-offs between these objectives are analyzed, and a balancing objective function is proposed. The results show that the balanced objective can improve the efficiency of traffic flow in passenger terminals and on ramps, as well as the robustness of gate operations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1301.3535v1-abstract-full').style.display = 'none'; document.getElementById('1301.3535v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is submitted to the tenth USA/Europe ATM 2013 seminar</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10