Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 55 results for author: <span class="mathjax">Hwang, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Hwang%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Hwang, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Hwang%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Hwang, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hwang%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hwang%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hwang%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04273">arXiv:2411.04273</a> <span> [<a href="https://arxiv.org/pdf/2411.04273">pdf</a>, <a href="https://arxiv.org/format/2411.04273">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Understanding Generative AI in Robot Logic Parametrization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yuna Hwang</a>, <a href="/search/cs?searchtype=author&query=Sato%2C+A+J">Arissa J. Sato</a>, <a href="/search/cs?searchtype=author&query=Praveena%2C+P">Pragathi Praveena</a>, <a href="/search/cs?searchtype=author&query=White%2C+N+T">Nathan Thomas White</a>, <a href="/search/cs?searchtype=author&query=Mutlu%2C+B">Bilge Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04273v1-abstract-short" style="display: inline;"> Leveraging generative AI (for example, Large Language Models) for language understanding within robotics opens up possibilities for LLM-driven robot end-user development (EUD). Despite the numerous design opportunities it provides, little is understood about how this technology can be utilized when constructing robot program logic. In this paper, we outline the background in capturing natural lang… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04273v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04273v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04273v1-abstract-full" style="display: none;"> Leveraging generative AI (for example, Large Language Models) for language understanding within robotics opens up possibilities for LLM-driven robot end-user development (EUD). Despite the numerous design opportunities it provides, little is understood about how this technology can be utilized when constructing robot program logic. In this paper, we outline the background in capturing natural language end-user intent and summarize previous use cases of LLMs within EUD. Taking the context of filmmaking as an example, we explore how a cinematography practitioner's intent to film a certain scene can be articulated using natural language, captured by an LLM, and further parametrized as low-level robot arm movement. We explore the capabilities of an LLM interpreting end-user intent and mapping natural language to predefined, cross-modal data in the process of iterative program development. We conclude by suggesting future opportunities for domain exploration beyond cinematography to support language-driven robotic camera navigation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04273v1-abstract-full').style.display = 'none'; document.getElementById('2411.04273v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures. Paper presented at the End-User Development for Human-Robot Interaction (EUD4HRI) Workshop, part of the 19th Annual ACM/IEEE International Conference on Human Robot Interaction (HRI, 2024)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.9; J.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23811">arXiv:2410.23811</a> <span> [<a href="https://arxiv.org/pdf/2410.23811">pdf</a>, <a href="https://arxiv.org/format/2410.23811">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> </div> </div> <p class="title is-5 mathjax"> UniqueQMA vs QMA: oracle separation and eigenstate thermalization hypothesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Anshu%2C+A">Anurag Anshu</a>, <a href="/search/cs?searchtype=author&query=Haferkamp%2C+J">Jonas Haferkamp</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yeongwoo Hwang</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+Q+T">Quynh T. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23811v1-abstract-short" style="display: inline;"> We study the long-standing open question of the power of unique witness in quantum protocols, which asks if UniqueQMA, a variant of QMA whose accepting witness space is 1-dimensional, is equal to QMA. We show a quantum oracle separation between UniqueQMA and QMA via an extension of the Aaronson-Kuperberg's QCMA vs QMA oracle separation. In particular, we show that any UniqueQMA protocol must make… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23811v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23811v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23811v1-abstract-full" style="display: none;"> We study the long-standing open question of the power of unique witness in quantum protocols, which asks if UniqueQMA, a variant of QMA whose accepting witness space is 1-dimensional, is equal to QMA. We show a quantum oracle separation between UniqueQMA and QMA via an extension of the Aaronson-Kuperberg's QCMA vs QMA oracle separation. In particular, we show that any UniqueQMA protocol must make $惟(\sqrt{D})$ queries to a subspace phase oracle of unknown dimension $\leq D$ to "find" the subspace. This presents an obstacle to relativizing techniques in resolving this question (unlike its classical analogue - the Valiant-Vazirani theorem - which is essentially a black-box reduction) and suggests the need to study the structure of the ground space of local Hamiltonians in distilling a potential unique witness. Our techniques also yield a quantum oracle separation between QXC, the class characterizing quantum approximate counting, and QMA. Very few structural properties are known that place the complexity of local Hamiltonians in UniqueQMA. We expand this set of properties by showing that the ground energy of local Hamiltonians that satisfy the eigenstate thermalization hypothesis (ETH) can be estimated through a UniqueQMA protocol. Specifically, our protocol can be viewed as a quantum expander test in a low energy subspace of the Hamiltonian and verifies a unique entangled state in two copies of the subspace. This allows us to conclude that if UniqueQMA $\neq$ QMA, then QMA-hard Hamiltonians must violate ETH under adversarial perturbations (more accurately, under the quantum PCP conjecture if ETH only applies to extensive energy subspaces). Our results serve as evidence that chaotic local Hamiltonians, such as the SYK model, contain polynomial verifiable quantum states in their low energy regime and may be simpler than general local Hamiltonians if UniqueQMA $\neq$ QMA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23811v1-abstract-full').style.display = 'none'; document.getElementById('2410.23811v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20774">arXiv:2410.20774</a> <span> [<a href="https://arxiv.org/pdf/2410.20774">pdf</a>, <a href="https://arxiv.org/format/2410.20774">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Are LLM-Judges Robust to Expressions of Uncertainty? Investigating the effect of Epistemic Markers on LLM-based Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+D">Dongryeol Lee</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yerin Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yongil Kim</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Joonsuk Park</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+K">Kyomin Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20774v1-abstract-short" style="display: inline;"> In line with the principle of honesty, there has been a growing effort to train large language models (LLMs) to generate outputs containing epistemic markers. However, evaluation in the presence of epistemic markers has been largely overlooked, raising a critical question: Could the use of epistemic markers in LLM-generated outputs lead to unintended negative consequences? To address this, we pres… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20774v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20774v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20774v1-abstract-full" style="display: none;"> In line with the principle of honesty, there has been a growing effort to train large language models (LLMs) to generate outputs containing epistemic markers. However, evaluation in the presence of epistemic markers has been largely overlooked, raising a critical question: Could the use of epistemic markers in LLM-generated outputs lead to unintended negative consequences? To address this, we present EMBER, a benchmark designed to assess the robustness of LLM-judges to epistemic markers in both single and pairwise evaluation settings. Our findings, based on evaluations using EMBER, reveal that all tested LLM-judges, including GPT-4o, show a notable lack of robustness in the presence of epistemic markers. Specifically, we observe a negative bias toward epistemic markers, with a stronger bias against markers expressing uncertainty. This suggests that LLM-judges are influenced by the presence of these markers and do not focus solely on the correctness of the content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20774v1-abstract-full').style.display = 'none'; document.getElementById('2410.20774v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 6 figures, 15 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19503">arXiv:2410.19503</a> <span> [<a href="https://arxiv.org/pdf/2410.19503">pdf</a>, <a href="https://arxiv.org/format/2410.19503">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SWITCH: Studying with Teacher for Knowledge Distillation of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Koo%2C+J">Jahyun Koo</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yerin Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yongil Kim</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+T">Taegwan Kang</a>, <a href="/search/cs?searchtype=author&query=Bae%2C+H">Hyunkyung Bae</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+K">Kyomin Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19503v1-abstract-short" style="display: inline;"> Despite the success of Large Language Models (LLMs), they still face challenges related to high inference costs and memory requirements. To address these issues, Knowledge Distillation (KD) has emerged as a popular method for model compression, with student-generated outputs (SGOs) being particularly notable for reducing the mismatch between training and inference. However, SGOs often produce nois… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19503v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19503v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19503v1-abstract-full" style="display: none;"> Despite the success of Large Language Models (LLMs), they still face challenges related to high inference costs and memory requirements. To address these issues, Knowledge Distillation (KD) has emerged as a popular method for model compression, with student-generated outputs (SGOs) being particularly notable for reducing the mismatch between training and inference. However, SGOs often produce noisy and biased sequences, which can lead to misguidance from the teacher model, especially in long sequences. To mitigate these challenges, we propose SWITCH (Studying WIth TeaCHer for Knowledge Distillation), a novel approach that strategically incorporates the teacher model during the student's sequence generation. SWITCH identifies discrepancies between the token probabilities of the teacher and student models, allowing the teacher to intervene selectively, particularly in long sequences that are more prone to teacher misguidance. Extensive experimental results across three model families and five instruction-following datasets show that SWITCH surpasses traditional KD methods, particularly excelling in the generation of long sequential data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19503v1-abstract-full').style.display = 'none'; document.getElementById('2410.19503v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18385">arXiv:2410.18385</a> <span> [<a href="https://arxiv.org/pdf/2410.18385">pdf</a>, <a href="https://arxiv.org/format/2410.18385">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Link, Synthesize, Retrieve: Universal Document Linking for Zero-Shot Information Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+D+Y">Dae Yon Hwang</a>, <a href="/search/cs?searchtype=author&query=Taha%2C+B">Bilal Taha</a>, <a href="/search/cs?searchtype=author&query=Pande%2C+H">Harshit Pande</a>, <a href="/search/cs?searchtype=author&query=Nechaev%2C+Y">Yaroslav Nechaev</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18385v2-abstract-short" style="display: inline;"> Despite the recent advancements in information retrieval (IR), zero-shot IR remains a significant challenge, especially when dealing with new domains, languages, and newly-released use cases that lack historical query traffic from existing users. For such cases, it is common to use query augmentations followed by fine-tuning pre-trained models on the document data paired with synthetic queries. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18385v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18385v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18385v2-abstract-full" style="display: none;"> Despite the recent advancements in information retrieval (IR), zero-shot IR remains a significant challenge, especially when dealing with new domains, languages, and newly-released use cases that lack historical query traffic from existing users. For such cases, it is common to use query augmentations followed by fine-tuning pre-trained models on the document data paired with synthetic queries. In this work, we propose a novel Universal Document Linking (UDL) algorithm, which links similar documents to enhance synthetic query generation across multiple datasets with different characteristics. UDL leverages entropy for the choice of similarity models and named entity recognition (NER) for the link decision of documents using similarity scores. Our empirical studies demonstrate the effectiveness and universality of the UDL across diverse datasets and IR models, surpassing state-of-the-art methods in zero-shot cases. The developed code for reproducibility is included in https://github.com/eoduself/UDL <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18385v2-abstract-full').style.display = 'none'; document.getElementById('2410.18385v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at EMNLP 2024 Main Conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10495">arXiv:2410.10495</a> <span> [<a href="https://arxiv.org/pdf/2410.10495">pdf</a>, <a href="https://arxiv.org/format/2410.10495">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> </div> </div> <p class="title is-5 mathjax"> Commuting Local Hamiltonians Beyond 2D </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bostanci%2C+J">John Bostanci</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yeongwoo Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10495v3-abstract-short" style="display: inline;"> Commuting local Hamiltonians provide a testing ground for studying many of the most interesting open questions in quantum information theory, including the quantum PCP conjecture and the existence of area laws. Although they are a simplified model of quantum computation, the status of the commuting local Hamiltonian problem remains largely unknown. A number of works have shown that increasingly ex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10495v3-abstract-full').style.display = 'inline'; document.getElementById('2410.10495v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10495v3-abstract-full" style="display: none;"> Commuting local Hamiltonians provide a testing ground for studying many of the most interesting open questions in quantum information theory, including the quantum PCP conjecture and the existence of area laws. Although they are a simplified model of quantum computation, the status of the commuting local Hamiltonian problem remains largely unknown. A number of works have shown that increasingly expressive families of commuting local Hamiltonians admit completely classical verifiers. Despite intense work, the largest class of commuting local Hamiltonians we can place in NP are those on a square lattice, where each lattice site is a qutrit. Even worse, many of the techniques used to analyze these problems rely heavily on the geometry of the square lattice and the properties of the numbers 2 and 3 as local dimensions. In this work, we present a new technique to analyze the complexity of various families of commuting local Hamiltonians: guided reductions. Intuitively, these are a generalization of typical reduction where the prover provides a guide so that the verifier can construct a simpler Hamiltonian. The core of our reduction is a new rounding technique based on a combination of Jordan's Lemma and the Structure Lemma. Our rounding technique is much more flexible than previous work, and allows us to show that a larger family of commuting local Hamiltonians is in NP, albiet with the restriction that all terms are rank-1. Specifically, we prove the following two results: 1. Commuting local Hamiltonians in 2D that are rank-1 are contained in NP, independent of the qudit dimension. Note that this family of commuting local Hamiltonians has no restriction on the local dimension or the locality. 2. We prove that rank-1, 3D commuting Hamiltonians with qudits on edges are in NP. To our knowledge this is the first time a family of 3D commuting local Hamiltonians has been contained in NP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10495v3-abstract-full').style.display = 'none'; document.getElementById('2410.10495v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages, 12 figures. v2: Fixed transparencies in figures. v3: Fixed date in title page</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04909">arXiv:2410.04909</a> <span> [<a href="https://arxiv.org/pdf/2410.04909">pdf</a>, <a href="https://arxiv.org/ps/2410.04909">ps</a>, <a href="https://arxiv.org/format/2410.04909">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Gibbs state preparation for commuting Hamiltonian: Mapping to classical Gibbs sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yeongwoo Hwang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jiaqing Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04909v2-abstract-short" style="display: inline;"> Gibbs state preparation, or Gibbs sampling, is a key computational technique extensively used in physics, statistics, and other scientific fields. Recent efforts for designing fast mixing Gibbs samplers for quantum Hamiltonians have largely focused on commuting local Hamiltonians (CLHs), a non-trivial subclass of Hamiltonians which include highly entangled systems such as the Toric code and quantu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04909v2-abstract-full').style.display = 'inline'; document.getElementById('2410.04909v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04909v2-abstract-full" style="display: none;"> Gibbs state preparation, or Gibbs sampling, is a key computational technique extensively used in physics, statistics, and other scientific fields. Recent efforts for designing fast mixing Gibbs samplers for quantum Hamiltonians have largely focused on commuting local Hamiltonians (CLHs), a non-trivial subclass of Hamiltonians which include highly entangled systems such as the Toric code and quantum double model. Most previous Gibbs samplers relied on simulating the Davies generator, which is a Lindbladian associated with the thermalization process in nature. Instead of using the Davies generator, we design a different Gibbs sampler for various CLHs by giving a reduction to classical Hamiltonians, in the sense that one can efficiently prepare the Gibbs state for some CLH $H$ on a quantum computer as long as one can efficiently do classical Gibbs sampling for the corresponding classical Hamiltonian $H^{(c)}$. We demonstrate that our Gibbs sampler is able to replicate state-of-the-art results as well as prepare the Gibbs state in regimes which were previously unknown, such as the low temperature region, as long as there exists fast mixing Gibbs samplers for the corresponding classical Hamiltonians. Our reductions are as follows. - If $H$ is a 2-local qudit CLH, then $H^{(c)}$ is a 2-local qudit classical Hamiltonian. - If $H$ is a 4-local qubit CLH on 2D lattice and there are no classical qubits, then $H^{(c)}$ is a 2-local qudit classical Hamiltonian on a planar graph. As an example, our algorithm can prepare the Gibbs state for the (defected) Toric code at any non-zero temperature in $\mathcal O(n^2)$ time. - If $H$ is a 4-local qubit CLH on 2D lattice and there are classical qubits, assuming that quantum terms are uniformly correctable, then $H^{(c)}$ is a constant-local classical Hamiltonian. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04909v2-abstract-full').style.display = 'none'; document.getElementById('2410.04909v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Fixed typo in abstract and included related work arXiv:2403.14912</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04751">arXiv:2410.04751</a> <span> [<a href="https://arxiv.org/pdf/2410.04751">pdf</a>, <a href="https://arxiv.org/format/2410.04751">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Intriguing Properties of Large Language and Vision Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Young-Jun Lee</a>, <a href="/search/cs?searchtype=author&query=Ko%2C+B">Byungsoo Ko</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+H">Han-Gyu Kim</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yechan Hwang</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+H">Ho-Jin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04751v1-abstract-short" style="display: inline;"> Recently, large language and vision models (LLVMs) have received significant attention and development efforts due to their remarkable generalization performance across a wide range of tasks requiring perception and cognitive abilities. A key factor behind their success is their simple architecture, which consists of a vision encoder, a projector, and a large language model (LLM). Despite their ac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04751v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04751v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04751v1-abstract-full" style="display: none;"> Recently, large language and vision models (LLVMs) have received significant attention and development efforts due to their remarkable generalization performance across a wide range of tasks requiring perception and cognitive abilities. A key factor behind their success is their simple architecture, which consists of a vision encoder, a projector, and a large language model (LLM). Despite their achievements in advanced reasoning tasks, their performance on fundamental perception-related tasks (e.g., MMVP) remains surprisingly low. This discrepancy raises the question of how LLVMs truly perceive images and exploit the advantages of the vision encoder. To address this, we systematically investigate this question regarding several aspects: permutation invariance, robustness, math reasoning, alignment preserving and importance, by evaluating the most common LLVM's families (i.e., LLaVA) across 10 evaluation benchmarks. Our extensive experiments reveal several intriguing properties of current LLVMs: (1) they internally process the image in a global manner, even when the order of visual patch sequences is randomly permuted; (2) they are sometimes able to solve math problems without fully perceiving detailed numerical information; (3) the cross-modal alignment is overfitted to complex reasoning tasks, thereby, causing them to lose some of the original perceptual capabilities of their vision encoder; (4) the representation space in the lower layers (<25%) plays a crucial role in determining performance and enhancing visual understanding. Lastly, based on the above observations, we suggest potential future directions for building better LLVMs and constructing more challenging evaluation benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04751v1-abstract-full').style.display = 'none'; document.getElementById('2410.04751v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code is available in https://github.com/passing2961/IP-LLVM</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18426">arXiv:2409.18426</a> <span> [<a href="https://arxiv.org/pdf/2409.18426">pdf</a>, <a href="https://arxiv.org/format/2409.18426">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Dual Cone Gradient Descent for Training Physics-Informed Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Youngsik Hwang</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+D">Dong-Young Lim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18426v1-abstract-short" style="display: inline;"> Physics-informed neural networks (PINNs) have emerged as a prominent approach for solving partial differential equations (PDEs) by minimizing a combined loss function that incorporates both boundary loss and PDE residual loss. Despite their remarkable empirical performance in various scientific computing tasks, PINNs often fail to generate reasonable solutions, and such pathological behaviors rema… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18426v1-abstract-full').style.display = 'inline'; document.getElementById('2409.18426v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18426v1-abstract-full" style="display: none;"> Physics-informed neural networks (PINNs) have emerged as a prominent approach for solving partial differential equations (PDEs) by minimizing a combined loss function that incorporates both boundary loss and PDE residual loss. Despite their remarkable empirical performance in various scientific computing tasks, PINNs often fail to generate reasonable solutions, and such pathological behaviors remain difficult to explain and resolve. In this paper, we identify that PINNs can be adversely trained when gradients of each loss function exhibit a significant imbalance in their magnitudes and present a negative inner product value. To address these issues, we propose a novel optimization framework, Dual Cone Gradient Descent (DCGD), which adjusts the direction of the updated gradient to ensure it falls within a dual cone region. This region is defined as a set of vectors where the inner products with both the gradients of the PDE residual loss and the boundary loss are non-negative. Theoretically, we analyze the convergence properties of DCGD algorithms in a non-convex setting. On a variety of benchmark equations, we demonstrate that DCGD outperforms other optimization algorithms in terms of various evaluation metrics. In particular, DCGD achieves superior predictive accuracy and enhances the stability of training for failure modes of PINNs and complex PDEs, compared to existing optimally tuned models. Moreover, DCGD can be further improved by combining it with popular strategies for PINNs, including learning rate annealing and the Neural Tangent Kernel (NTK). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18426v1-abstract-full').style.display = 'none'; document.getElementById('2409.18426v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16497">arXiv:2409.16497</a> <span> [<a href="https://arxiv.org/pdf/2409.16497">pdf</a>, <a href="https://arxiv.org/format/2409.16497">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Text Representation Learning via Instruction-Tuning for Zero-Shot Dense Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Q">Qiuhai Zeng</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+Z">Zimeng Qiu</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+D+Y">Dae Yon Hwang</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xin He</a>, <a href="/search/cs?searchtype=author&query=Campbell%2C+W+M">William M. Campbell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16497v1-abstract-short" style="display: inline;"> Dense retrieval systems are commonly used for information retrieval (IR). They rely on learning text representations through an encoder and usually require supervised modeling via labelled data which can be costly to obtain or simply unavailable. In this study, we introduce a novel unsupervised text representation learning technique via instruction-tuning the pre-trained encoder-decoder large lang… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16497v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16497v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16497v1-abstract-full" style="display: none;"> Dense retrieval systems are commonly used for information retrieval (IR). They rely on learning text representations through an encoder and usually require supervised modeling via labelled data which can be costly to obtain or simply unavailable. In this study, we introduce a novel unsupervised text representation learning technique via instruction-tuning the pre-trained encoder-decoder large language models (LLM) under the dual-encoder retrieval framework. We demonstrate the corpus representation can be augmented by the representations of relevant synthetic queries generated by the instruct-tuned LLM founded on the Rao-Blackwell theorem. Furthermore, we effectively align the query and corpus text representation with self-instructed-tuning. Specifically, we first prompt an open-box pre-trained LLM to follow defined instructions (i.e. question generation and keyword summarization) to generate synthetic queries. Next, we fine-tune the pre-trained LLM with defined instructions and the generated queries that passed quality check. Finally, we generate synthetic queries with the instruction-tuned LLM for each corpora and represent each corpora by weighted averaging the synthetic queries and original corpora embeddings. We evaluate our proposed method under low-resource settings on three English and one German retrieval datasets measuring NDCG@10, MRR@100, Recall@100. We significantly improve the average zero-shot retrieval performance on all metrics, increasing open-box FLAN-T5 model variations by [3.34%, 3.50%] in absolute and exceeding three competitive dense retrievers (i.e. mDPR, T-Systems, mBART-Large), with model of size at least 38% smaller, by 1.96%, 4.62%, 9.52% absolute on NDCG@10. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16497v1-abstract-full').style.display = 'none'; document.getElementById('2409.16497v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at DCAI24 workshop@CIKM2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12211">arXiv:2408.12211</a> <span> [<a href="https://arxiv.org/pdf/2408.12211">pdf</a>, <a href="https://arxiv.org/format/2408.12211">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Computer-Aided Fall Recognition Using a Three-Stream Spatial-Temporal GCN Model with Adaptive Feature Aggregation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shin%2C+J">Jungpil Shin</a>, <a href="/search/cs?searchtype=author&query=Miah%2C+A+S+M">Abu Saleh Musa Miah</a>, <a href="/search/cs?searchtype=author&query=Egawa1%2C+R">Rei Egawa1</a>, <a href="/search/cs?searchtype=author&query=Hirooka%2C+K">Koki Hirooka</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+M+A+M">Md. Al Mehedi Hasan</a>, <a href="/search/cs?searchtype=author&query=Tomioka%2C+Y">Yoichi Tomioka</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y+S">Yong Seok Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12211v1-abstract-short" style="display: inline;"> The prevention of falls is paramount in modern healthcare, particularly for the elderly, as falls can lead to severe injuries or even fatalities. Additionally, the growing incidence of falls among the elderly, coupled with the urgent need to prevent suicide attempts resulting from medication overdose, underscores the critical importance of accurate and efficient fall detection methods. In this sce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12211v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12211v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12211v1-abstract-full" style="display: none;"> The prevention of falls is paramount in modern healthcare, particularly for the elderly, as falls can lead to severe injuries or even fatalities. Additionally, the growing incidence of falls among the elderly, coupled with the urgent need to prevent suicide attempts resulting from medication overdose, underscores the critical importance of accurate and efficient fall detection methods. In this scenario, a computer-aided fall detection system is inevitable to save elderly people's lives worldwide. Many researchers have been working to develop fall detection systems. However, the existing fall detection systems often struggle with issues such as unsatisfactory performance accuracy, limited robustness, high computational complexity, and sensitivity to environmental factors due to a lack of effective features. In response to these challenges, this paper proposes a novel three-stream spatial-temporal feature-based fall detection system. Our system incorporates joint skeleton-based spatial and temporal Graph Convolutional Network (GCN) features, joint motion-based spatial and temporal GCN features, and residual connections-based features. Each stream employs adaptive graph-based feature aggregation and consecutive separable convolutional neural networks (Sep-TCN), significantly reducing computational complexity and model parameters compared to prior systems. Experimental results across multiple datasets demonstrate the superior effectiveness and efficiency of our proposed system, with accuracies of 99.51\%, 99.15\%, 99.79\% and 99.85 \% achieved on the ImViA, UR-Fall, Fall-UP and FU-Kinect datasets, respectively. The remarkable performance of our system highlights its superiority, efficiency, and generalizability in real-world fall detection scenarios, offering significant advancements in healthcare and societal well-being. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12211v1-abstract-full').style.display = 'none'; document.getElementById('2408.12211v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13942">arXiv:2407.13942</a> <span> [<a href="https://arxiv.org/pdf/2407.13942">pdf</a>, <a href="https://arxiv.org/format/2407.13942">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Harmful Suicide Content Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Park%2C+K">Kyumin Park</a>, <a href="/search/cs?searchtype=author&query=Baik%2C+M+J">Myung Jae Baik</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">YeongJun Hwang</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+Y">Yen Shin</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">HoJae Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+R">Ruda Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S+M">Sang Min Lee</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J+Y+H">Je Young Hannah Sun</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+A+R">Ah Rah Lee</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+S+Y">Si Yeun Yoon</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+D">Dong-ho Lee</a>, <a href="/search/cs?searchtype=author&query=Moon%2C+J">Jihyung Moon</a>, <a href="/search/cs?searchtype=author&query=Bak%2C+J">JinYeong Bak</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+K">Kyunghyun Cho</a>, <a href="/search/cs?searchtype=author&query=Paik%2C+J">Jong-Woo Paik</a>, <a href="/search/cs?searchtype=author&query=Park%2C+S">Sungjoon Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13942v1-abstract-short" style="display: inline;"> Harmful suicide content on the Internet is a significant risk factor inducing suicidal thoughts and behaviors among vulnerable populations. Despite global efforts, existing resources are insufficient, specifically in high-risk regions like the Republic of Korea. Current research mainly focuses on understanding negative effects of such content or suicide risk in individuals, rather than on automati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13942v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13942v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13942v1-abstract-full" style="display: none;"> Harmful suicide content on the Internet is a significant risk factor inducing suicidal thoughts and behaviors among vulnerable populations. Despite global efforts, existing resources are insufficient, specifically in high-risk regions like the Republic of Korea. Current research mainly focuses on understanding negative effects of such content or suicide risk in individuals, rather than on automatically detecting the harmfulness of content. To fill this gap, we introduce a harmful suicide content detection task for classifying online suicide content into five harmfulness levels. We develop a multi-modal benchmark and a task description document in collaboration with medical professionals, and leverage large language models (LLMs) to explore efficient methods for moderating such content. Our contributions include proposing a novel detection task, a multi-modal Korean benchmark with expert annotations, and suggesting strategies using LLMs to detect illegal and harmful content. Owing to the potential harm involved, we publicize our implementations and benchmark, incorporating an ethical verification process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13942v1-abstract-full').style.display = 'none'; document.getElementById('2407.13942v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13751">arXiv:2407.13751</a> <span> [<a href="https://arxiv.org/pdf/2407.13751">pdf</a>, <a href="https://arxiv.org/format/2407.13751">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Finance">q-fin.CP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Temporal Representation Learning for Stock Similarities and Its Applications in Investment Management </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yoontae Hwang</a>, <a href="/search/cs?searchtype=author&query=Zohren%2C+S">Stefan Zohren</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Yongjae Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13751v1-abstract-short" style="display: inline;"> In the era of rapid globalization and digitalization, accurate identification of similar stocks has become increasingly challenging due to the non-stationary nature of financial markets and the ambiguity in conventional regional and sector classifications. To address these challenges, we examine SimStock, a novel temporal self-supervised learning framework that combines techniques from self-superv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13751v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13751v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13751v1-abstract-full" style="display: none;"> In the era of rapid globalization and digitalization, accurate identification of similar stocks has become increasingly challenging due to the non-stationary nature of financial markets and the ambiguity in conventional regional and sector classifications. To address these challenges, we examine SimStock, a novel temporal self-supervised learning framework that combines techniques from self-supervised learning (SSL) and temporal domain generalization to learn robust and informative representations of financial time series data. The primary focus of our study is to understand the similarities between stocks from a broader perspective, considering the complex dynamics of the global financial landscape. We conduct extensive experiments on four real-world datasets with thousands of stocks and demonstrate the effectiveness of SimStock in finding similar stocks, outperforming existing methods. The practical utility of SimStock is showcased through its application to various investment strategies, such as pairs trading, index tracking, and portfolio optimization, where it leads to superior performance compared to conventional methods. Our findings empirically examine the potential of data-driven approach to enhance investment decision-making and risk management practices by leveraging the power of temporal self-supervised learning in the face of the ever-changing global financial landscape. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13751v1-abstract-full').style.display = 'none'; document.getElementById('2407.13751v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04249">arXiv:2407.04249</a> <span> [<a href="https://arxiv.org/pdf/2407.04249">pdf</a>, <a href="https://arxiv.org/format/2407.04249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FeatureSORT: Essential Features for Effective Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hashempoor%2C+H">Hamidreza Hashempoor</a>, <a href="/search/cs?searchtype=author&query=Koikara%2C+R">Rosemary Koikara</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y+D">Yu Dong Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04249v1-abstract-short" style="display: inline;"> In this work, we introduce a novel tracker designed for online multiple object tracking with a focus on being simple, while being effective. we provide multiple feature modules each of which stands for a particular appearance information. By integrating distinct appearance features, including clothing color, style, and target direction, alongside a ReID network for robust embedding extraction, our… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04249v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04249v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04249v1-abstract-full" style="display: none;"> In this work, we introduce a novel tracker designed for online multiple object tracking with a focus on being simple, while being effective. we provide multiple feature modules each of which stands for a particular appearance information. By integrating distinct appearance features, including clothing color, style, and target direction, alongside a ReID network for robust embedding extraction, our tracker significantly enhances online tracking accuracy. Additionally, we propose the incorporation of a stronger detector and also provide an advanced post processing methods that further elevate the tracker's performance. During real time operation, we establish measurement to track associated distance function which includes the IoU, direction, color, style, and ReID features similarity information, where each metric is calculated separately. With the design of our feature related distance function, it is possible to track objects through longer period of occlusions, while keeping the number of identity switches comparatively low. Extensive experimental evaluation demonstrates notable improvement in tracking accuracy and reliability, as evidenced by reduced identity switches and enhanced occlusion handling. These advancements not only contribute to the state of the art in object tracking but also open new avenues for future research and practical applications demanding high precision and reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04249v1-abstract-full').style.display = 'none'; document.getElementById('2407.04249v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01833">arXiv:2406.01833</a> <span> [<a href="https://arxiv.org/pdf/2406.01833">pdf</a>, <a href="https://arxiv.org/format/2406.01833">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3637528.3671724">10.1145/3637528.3671724 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> CAFO: Feature-Centric Explanation on Time Series Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jaeho Kim</a>, <a href="/search/cs?searchtype=author&query=Hahn%2C+S">Seok-Ju Hahn</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yoontae Hwang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Junghye Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Seulki Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01833v2-abstract-short" style="display: inline;"> In multivariate time series (MTS) classification, finding the important features (e.g., sensors) for model performance is crucial yet challenging due to the complex, high-dimensional nature of MTS data, intricate temporal dynamics, and the necessity for domain-specific interpretations. Current explanation methods for MTS mostly focus on time-centric explanations, apt for pinpointing important time… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01833v2-abstract-full').style.display = 'inline'; document.getElementById('2406.01833v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01833v2-abstract-full" style="display: none;"> In multivariate time series (MTS) classification, finding the important features (e.g., sensors) for model performance is crucial yet challenging due to the complex, high-dimensional nature of MTS data, intricate temporal dynamics, and the necessity for domain-specific interpretations. Current explanation methods for MTS mostly focus on time-centric explanations, apt for pinpointing important time periods but less effective in identifying key features. This limitation underscores the pressing need for a feature-centric approach, a vital yet often overlooked perspective that complements time-centric analysis. To bridge this gap, our study introduces a novel feature-centric explanation and evaluation framework for MTS, named CAFO (Channel Attention and Feature Orthgonalization). CAFO employs a convolution-based approach with channel attention mechanisms, incorporating a depth-wise separable channel attention module (DepCA) and a QR decomposition-based loss for promoting feature-wise orthogonality. We demonstrate that this orthogonalization enhances the separability of attention distributions, thereby refining and stabilizing the ranking of feature importance. This improvement in feature-wise ranking enhances our understanding of feature explainability in MTS. Furthermore, we develop metrics to evaluate global and class-specific feature importance. Our framework's efficacy is validated through extensive empirical analyses on two major public benchmarks and real-world datasets, both synthetic and self-collected, specifically designed to highlight class-wise discriminative features. The results confirm CAFO's robustness and informative capacity in assessing feature importance in MTS classification tasks. This study not only advances the understanding of feature-centric explanations in MTS but also sets a foundation for future explorations in feature-centric explanations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01833v2-abstract-full').style.display = 'none'; document.getElementById('2406.01833v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to KDD 2024 Research Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00841">arXiv:2406.00841</a> <span> [<a href="https://arxiv.org/pdf/2406.00841">pdf</a>, <a href="https://arxiv.org/format/2406.00841">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3643834.3660721">10.1145/3643834.3660721 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Understanding On-the-Fly End-User Robot Programming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Stegner%2C+L">Laura Stegner</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yuna Hwang</a>, <a href="/search/cs?searchtype=author&query=Porfirio%2C+D">David Porfirio</a>, <a href="/search/cs?searchtype=author&query=Mutlu%2C+B">Bilge Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00841v1-abstract-short" style="display: inline;"> Novel end-user programming (EUP) tools enable on-the-fly (i.e., spontaneous, easy, and rapid) creation of interactions with robotic systems. These tools are expected to empower users in determining system behavior, although very little is understood about how end users perceive, experience, and use these systems. In this paper, we seek to address this gap by investigating end-user experience with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00841v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00841v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00841v1-abstract-full" style="display: none;"> Novel end-user programming (EUP) tools enable on-the-fly (i.e., spontaneous, easy, and rapid) creation of interactions with robotic systems. These tools are expected to empower users in determining system behavior, although very little is understood about how end users perceive, experience, and use these systems. In this paper, we seek to address this gap by investigating end-user experience with on-the-fly robot EUP. We trained 21 end users to use an existing on-the-fly EUP tool, asked them to create robot interactions for four scenarios, and assessed their overall experience. Our findings provide insight into how these systems should be designed to better support end-user experience with on-the-fly EUP, focusing on user interaction with an automatic program synthesizer that resolves imprecise user input, the use of multimodal inputs to express user intent, and the general process of programming a robot. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00841v1-abstract-full').style.display = 'none'; document.getElementById('2406.00841v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at DIS'24. Stegner and Hwang contributed equally to this research</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20867">arXiv:2405.20867</a> <span> [<a href="https://arxiv.org/pdf/2405.20867">pdf</a>, <a href="https://arxiv.org/format/2405.20867">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> </div> </div> <p class="title is-5 mathjax"> Automatic Channel Pruning for Multi-Head Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+E">Eunho Lee</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Youngbae Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20867v1-abstract-short" style="display: inline;"> Despite the strong performance of Transformers, their quadratic computation complexity presents challenges in applying them to vision tasks. Automatic pruning is one of effective methods for reducing computation complexity without heuristic approaches. However, directly applying it to multi-head attention is not straightforward due to channel misalignment. In this paper, we propose an automatic ch… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20867v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20867v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20867v1-abstract-full" style="display: none;"> Despite the strong performance of Transformers, their quadratic computation complexity presents challenges in applying them to vision tasks. Automatic pruning is one of effective methods for reducing computation complexity without heuristic approaches. However, directly applying it to multi-head attention is not straightforward due to channel misalignment. In this paper, we propose an automatic channel pruning method to take into account the multi-head attention mechanism. First, we incorporate channel similarity-based weights into the pruning indicator to preserve more informative channels in each head. Then, we adjust pruning indicator to enforce removal of channels in equal proportions across all heads, preventing the channel misalignment. We also add a reweight module to compensate for information loss resulting from channel removal, and an effective initialization step for pruning indicator based on difference of attention between original structure and each channel. Our proposed method can be used to not only original attention, but also linear attention, which is more efficient as linear complexity with respect to the number of tokens. On ImageNet-1K, applying our pruning method to the FLattenTransformer, which includes both attention mechanisms, shows outperformed accuracy for several MACs compared with previous state-of-the-art efficient models and pruned methods. Code will be available soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20867v1-abstract-full').style.display = 'none'; document.getElementById('2405.20867v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01842">arXiv:2404.01842</a> <span> [<a href="https://arxiv.org/pdf/2404.01842">pdf</a>, <a href="https://arxiv.org/format/2404.01842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Semi-Supervised Domain Adaptation for Wildfire Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jang%2C+J">JooYoung Jang</a>, <a href="/search/cs?searchtype=author&query=Cha%2C+Y">Youngseo Cha</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jisu Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">SooHyung Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+G">Geonu Lee</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+M">Minkook Cho</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Young Hwang</a>, <a href="/search/cs?searchtype=author&query=Kwak%2C+N">Nojun Kwak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01842v1-abstract-short" style="display: inline;"> Recently, both the frequency and intensity of wildfires have increased worldwide, primarily due to climate change. In this paper, we propose a novel protocol for wildfire detection, leveraging semi-supervised Domain Adaptation for object detection, accompanied by a corresponding dataset designed for use by both academics and industries. Our dataset encompasses 30 times more diverse labeled scenes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01842v1-abstract-full').style.display = 'inline'; document.getElementById('2404.01842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01842v1-abstract-full" style="display: none;"> Recently, both the frequency and intensity of wildfires have increased worldwide, primarily due to climate change. In this paper, we propose a novel protocol for wildfire detection, leveraging semi-supervised Domain Adaptation for object detection, accompanied by a corresponding dataset designed for use by both academics and industries. Our dataset encompasses 30 times more diverse labeled scenes for the current largest benchmark wildfire dataset, HPWREN, and introduces a new labeling policy for wildfire detection. Inspired by CoordConv, we propose a robust baseline, Location-Aware Object Detection for Semi-Supervised Domain Adaptation (LADA), utilizing a teacher-student based framework capable of extracting translational variance features characteristic of wildfires. With only using 1% target domain labeled data, our framework significantly outperforms our source-only baseline by a notable margin of 3.8% in mean Average Precision on the HPWREN wildfire dataset. Our dataset is available at https://github.com/BloomBerry/LADA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01842v1-abstract-full').style.display = 'none'; document.getElementById('2404.01842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 5 figures, 22 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05814">arXiv:2403.05814</a> <span> [<a href="https://arxiv.org/pdf/2403.05814">pdf</a>, <a href="https://arxiv.org/format/2403.05814">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MP2D: An Automated Topic Shift Dialogue Generation Framework Leveraging Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yerin Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yongil Kim</a>, <a href="/search/cs?searchtype=author&query=Jang%2C+Y">Yunah Jang</a>, <a href="/search/cs?searchtype=author&query=Bang%2C+J">Jeesoo Bang</a>, <a href="/search/cs?searchtype=author&query=Bae%2C+H">Hyunkyung Bae</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+K">Kyomin Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05814v1-abstract-short" style="display: inline;"> Despite advancements in on-topic dialogue systems, effectively managing topic shifts within dialogues remains a persistent challenge, largely attributed to the limited availability of training datasets. To address this issue, we propose Multi-Passage to Dialogue (MP2D), a data generation framework that automatically creates conversational question-answering datasets with natural topic transitions.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05814v1-abstract-full').style.display = 'inline'; document.getElementById('2403.05814v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05814v1-abstract-full" style="display: none;"> Despite advancements in on-topic dialogue systems, effectively managing topic shifts within dialogues remains a persistent challenge, largely attributed to the limited availability of training datasets. To address this issue, we propose Multi-Passage to Dialogue (MP2D), a data generation framework that automatically creates conversational question-answering datasets with natural topic transitions. By leveraging the relationships between entities in a knowledge graph, MP2D maps the flow of topics within a dialogue, effectively mirroring the dynamics of human conversation. It retrieves relevant passages corresponding to the topics and transforms them into dialogues through the passage-to-dialogue method. Through quantitative and qualitative experiments, we demonstrate MP2D's efficacy in generating dialogue with natural topic shifts. Furthermore, this study introduces a novel benchmark for topic shift dialogues, TS-WikiDialog. Utilizing the dataset, we demonstrate that even Large Language Models (LLMs) struggle to handle topic shifts in dialogue effectively, and we showcase the performance improvements of models trained on datasets generated by MP2D across diverse topic shift dialogue tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05814v1-abstract-full').style.display = 'none'; document.getElementById('2403.05814v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.09770">arXiv:2401.09770</a> <span> [<a href="https://arxiv.org/pdf/2401.09770">pdf</a>, <a href="https://arxiv.org/format/2401.09770">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> </div> </div> <p class="title is-5 mathjax"> Reliability-based G1 Continuous Arc Spline Approximation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jeon%2C+J">Jinhwan Jeon</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yoonjin Hwang</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+S+B">Seibum B. Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.09770v1-abstract-short" style="display: inline;"> In this paper, we present an algorithm to approximate a set of data points with G1 continuous arcs, using points' covariance data. To the best of our knowledge, previous arc spline approximation approaches assumed that all data points contribute equally (i.e. have the same weights) during the approximation process. However, this assumption may cause serious instability in the algorithm, if the col… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09770v1-abstract-full').style.display = 'inline'; document.getElementById('2401.09770v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.09770v1-abstract-full" style="display: none;"> In this paper, we present an algorithm to approximate a set of data points with G1 continuous arcs, using points' covariance data. To the best of our knowledge, previous arc spline approximation approaches assumed that all data points contribute equally (i.e. have the same weights) during the approximation process. However, this assumption may cause serious instability in the algorithm, if the collected data contains outliers. To resolve this issue, a robust method for arc spline approximation is suggested in this work, assuming that the 2D covariance for each data point is given. Starting with the definition of models and parameters for single arc approximation, the framework is extended to multiple-arc approximation for general usage. Then the proposed algorithm is verified using generated noisy data and real-world collected data via vehicle experiment in Sejong City, South Korea. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09770v1-abstract-full').style.display = 'none'; document.getElementById('2401.09770v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">42 pages, 19 figures, Submitted to Computer Aided Geometric Design</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.07589">arXiv:2311.07589</a> <span> [<a href="https://arxiv.org/pdf/2311.07589">pdf</a>, <a href="https://arxiv.org/format/2311.07589">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Dialogizer: Context-aware Conversational-QA Dataset Generation from Textual Sources </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yerin Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yongil Kim</a>, <a href="/search/cs?searchtype=author&query=Bae%2C+H">Hyunkyung Bae</a>, <a href="/search/cs?searchtype=author&query=Bang%2C+J">Jeesoo Bang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hwanhee Lee</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+K">Kyomin Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.07589v1-abstract-short" style="display: inline;"> To address the data scarcity issue in Conversational question answering (ConvQA), a dialog inpainting method, which utilizes documents to generate ConvQA datasets, has been proposed. However, the original dialog inpainting model is trained solely on the dialog reconstruction task, resulting in the generation of questions with low contextual relevance due to insufficient learning of question-answer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07589v1-abstract-full').style.display = 'inline'; document.getElementById('2311.07589v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.07589v1-abstract-full" style="display: none;"> To address the data scarcity issue in Conversational question answering (ConvQA), a dialog inpainting method, which utilizes documents to generate ConvQA datasets, has been proposed. However, the original dialog inpainting model is trained solely on the dialog reconstruction task, resulting in the generation of questions with low contextual relevance due to insufficient learning of question-answer alignment. To overcome this limitation, we propose a novel framework called Dialogizer, which has the capability to automatically generate ConvQA datasets with high contextual relevance from textual sources. The framework incorporates two training tasks: question-answer matching (QAM) and topic-aware dialog generation (TDG). Moreover, re-ranking is conducted during the inference phase based on the contextual relevance of the generated questions. Using our framework, we produce four ConvQA datasets by utilizing documents from multiple domains as the primary source. Through automatic evaluation using diverse metrics, as well as human evaluation, we validate that our proposed framework exhibits the ability to generate datasets of higher quality compared to the baseline dialog inpainting model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07589v1-abstract-full').style.display = 'none'; document.getElementById('2311.07589v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2023 main conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.05373">arXiv:2311.05373</a> <span> [<a href="https://arxiv.org/pdf/2311.05373">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> What is prompt literacy? An exploratory study of language learners' development of new literacy skill using generative AI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yohan Hwang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+H">Jang Ho Lee</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+D">Dongkwang Shin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.05373v1-abstract-short" style="display: inline;"> In the current study,we propose that, in the era of generative AI, there is now a new form of literacy called "prompt literacy," which refers to the ability to generate precise prompts as input for AI systems, interpret the outputs, and iteratively refine prompts to achieve desired results. To explore the emergence and development of this literacy skill, the current study examined 30 EFL students'… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05373v1-abstract-full').style.display = 'inline'; document.getElementById('2311.05373v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.05373v1-abstract-full" style="display: none;"> In the current study,we propose that, in the era of generative AI, there is now a new form of literacy called "prompt literacy," which refers to the ability to generate precise prompts as input for AI systems, interpret the outputs, and iteratively refine prompts to achieve desired results. To explore the emergence and development of this literacy skill, the current study examined 30 EFL students' engagement in an AI-powered image creation project, through which they created artworks representing the socio-cultural meanings of English words by iteratively drafting and refining prompts in generative AI tools. By examining AI-generated images and the participants' drafting and revision of their prompts, this study demonstrated the emergence of learners' prompt literacy skills. The survey data further showed the participants' perceived improvement in their vocabulary learning strategies as a result of engaging in the target AI-powered project. In addition, the participants' post-project reflection revealed three benefits of developing prompt literacy: enjoyment from manifesting imagined outcomes; recognition of its importance for communication, problem-solving and career development; and the enhanced understanding of the collaborative nature of human-AI interaction. These findings suggest that prompt literacy is an increasingly crucial literacy for the AI era. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05373v1-abstract-full').style.display = 'none'; document.getElementById('2311.05373v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.00737">arXiv:2311.00737</a> <span> [<a href="https://arxiv.org/pdf/2311.00737">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> Real-Time Magnetic Tracking and Diagnosis of COVID-19 via Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+D">Dang Nguyen</a>, <a href="/search/cs?searchtype=author&query=Huynh%2C+P+K">Phat K. Huynh</a>, <a href="/search/cs?searchtype=author&query=Bui%2C+V+D+A">Vinh Duc An Bui</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+K+Y">Kee Young Hwang</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+N">Nityanand Jain</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+C">Chau Nguyen</a>, <a href="/search/cs?searchtype=author&query=Minh%2C+L+H+N">Le Huu Nhat Minh</a>, <a href="/search/cs?searchtype=author&query=Van+Truong%2C+L">Le Van Truong</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+X+T">Xuan Thanh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+D+H">Dinh Hoang Nguyen</a>, <a href="/search/cs?searchtype=author&query=Dung%2C+L+T">Le Tien Dung</a>, <a href="/search/cs?searchtype=author&query=Le%2C+T+Q">Trung Q. Le</a>, <a href="/search/cs?searchtype=author&query=Phan%2C+M">Manh-Huong Phan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.00737v1-abstract-short" style="display: inline;"> The COVID-19 pandemic underscored the importance of reliable, noninvasive diagnostic tools for robust public health interventions. In this work, we fused magnetic respiratory sensing technology (MRST) with machine learning (ML) to create a diagnostic platform for real-time tracking and diagnosis of COVID-19 and other respiratory diseases. The MRST precisely captures breathing patterns through thre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00737v1-abstract-full').style.display = 'inline'; document.getElementById('2311.00737v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.00737v1-abstract-full" style="display: none;"> The COVID-19 pandemic underscored the importance of reliable, noninvasive diagnostic tools for robust public health interventions. In this work, we fused magnetic respiratory sensing technology (MRST) with machine learning (ML) to create a diagnostic platform for real-time tracking and diagnosis of COVID-19 and other respiratory diseases. The MRST precisely captures breathing patterns through three specific breath testing protocols: normal breath, holding breath, and deep breath. We collected breath data from both COVID-19 patients and healthy subjects in Vietnam using this platform, which then served to train and validate ML models. Our evaluation encompassed multiple ML algorithms, including support vector machines and deep learning models, assessing their ability to diagnose COVID-19. Our multi-model validation methodology ensures a thorough comparison and grants the adaptability to select the most optimal model, striking a balance between diagnostic precision with model interpretability. The findings highlight the exceptional potential of our diagnostic tool in pinpointing respiratory anomalies, achieving over 90% accuracy. This innovative sensor technology can be seamlessly integrated into healthcare settings for patient monitoring, marking a significant enhancement for the healthcare infrastructure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00737v1-abstract-full').style.display = 'none'; document.getElementById('2311.00737v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.10571">arXiv:2308.10571</a> <span> [<a href="https://arxiv.org/pdf/2308.10571">pdf</a>, <a href="https://arxiv.org/format/2308.10571">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Overcoming Overconfidence for Active Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yujin Hwang</a>, <a href="/search/cs?searchtype=author&query=Jo%2C+W">Won Jo</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+J">Juyoung Hong</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yukyung Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.10571v1-abstract-short" style="display: inline;"> It is not an exaggeration to say that the recent progress in artificial intelligence technology depends on large-scale and high-quality data. Simultaneously, a prevalent issue exists everywhere: the budget for data labeling is constrained. Active learning is a prominent approach for addressing this issue, where valuable data for labeling is selected through a model and utilized to iteratively adju… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.10571v1-abstract-full').style.display = 'inline'; document.getElementById('2308.10571v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.10571v1-abstract-full" style="display: none;"> It is not an exaggeration to say that the recent progress in artificial intelligence technology depends on large-scale and high-quality data. Simultaneously, a prevalent issue exists everywhere: the budget for data labeling is constrained. Active learning is a prominent approach for addressing this issue, where valuable data for labeling is selected through a model and utilized to iteratively adjust the model. However, due to the limited amount of data in each iteration, the model is vulnerable to bias; thus, it is more likely to yield overconfident predictions. In this paper, we present two novel methods to address the problem of overconfidence that arises in the active learning scenario. The first is an augmentation strategy named Cross-Mix-and-Mix (CMaM), which aims to calibrate the model by expanding the limited training distribution. The second is a selection strategy named Ranked Margin Sampling (RankedMS), which prevents choosing data that leads to overly confident predictions. Through various experiments and analyses, we are able to demonstrate that our proposals facilitate efficient data selection by alleviating overconfidence, even though they are readily applicable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.10571v1-abstract-full').style.display = 'none'; document.getElementById('2308.10571v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.10166">arXiv:2308.10166</a> <span> [<a href="https://arxiv.org/pdf/2308.10166">pdf</a>, <a href="https://arxiv.org/format/2308.10166">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cell Spatial Analysis in Crohn's Disease: Unveiling Local Cell Arrangement Pattern with Graph-based Signatures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bao%2C+S">Shunxing Bao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Sichen Zhu</a>, <a href="/search/cs?searchtype=author&query=Kolachala%2C+V+L">Vasantha L Kolachala</a>, <a href="/search/cs?searchtype=author&query=Remedios%2C+L+W">Lucas W. Remedios</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yeonjoo Hwang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yutong Sun</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+C">Can Cui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yike Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jia Li</a>, <a href="/search/cs?searchtype=author&query=Roland%2C+J+T">Joseph T. Roland</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qi Liu</a>, <a href="/search/cs?searchtype=author&query=Lau%2C+K+S">Ken S. Lau</a>, <a href="/search/cs?searchtype=author&query=Kugathasan%2C+S">Subra Kugathasan</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+P">Peng Qiu</a>, <a href="/search/cs?searchtype=author&query=Wilson%2C+K+T">Keith T. Wilson</a>, <a href="/search/cs?searchtype=author&query=Coburn%2C+L+A">Lori A. Coburn</a>, <a href="/search/cs?searchtype=author&query=Landman%2C+B+A">Bennett A. Landman</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.10166v1-abstract-short" style="display: inline;"> Crohn's disease (CD) is a chronic and relapsing inflammatory condition that affects segments of the gastrointestinal tract. CD activity is determined by histological findings, particularly the density of neutrophils observed on Hematoxylin and Eosin stains (H&E) imaging. However, understanding the broader morphometry and local cell arrangement beyond cell counting and tissue morphology remains cha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.10166v1-abstract-full').style.display = 'inline'; document.getElementById('2308.10166v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.10166v1-abstract-full" style="display: none;"> Crohn's disease (CD) is a chronic and relapsing inflammatory condition that affects segments of the gastrointestinal tract. CD activity is determined by histological findings, particularly the density of neutrophils observed on Hematoxylin and Eosin stains (H&E) imaging. However, understanding the broader morphometry and local cell arrangement beyond cell counting and tissue morphology remains challenging. To address this, we characterize six distinct cell types from H&E images and develop a novel approach for the local spatial signature of each cell. Specifically, we create a 10-cell neighborhood matrix, representing neighboring cell arrangements for each individual cell. Utilizing t-SNE for non-linear spatial projection in scatter-plot and Kernel Density Estimation contour-plot formats, our study examines patterns of differences in the cellular environment associated with the odds ratio of spatial patterns between active CD and control groups. This analysis is based on data collected at the two research institutes. The findings reveal heterogeneous nearest-neighbor patterns, signifying distinct tendencies of cell clustering, with a particular focus on the rectum region. These variations underscore the impact of data heterogeneity on cell spatial arrangements in CD patients. Moreover, the spatial distribution disparities between the two research sites highlight the significance of collaborative efforts among healthcare organizations. All research analysis pipeline tools are available at https://github.com/MASILab/cellNN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.10166v1-abstract-full').style.display = 'none'; document.getElementById('2308.10166v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to SPIE Medical Imaging. San Diego, CA. February 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.14711">arXiv:2303.14711</a> <span> [<a href="https://arxiv.org/pdf/2303.14711">pdf</a>, <a href="https://arxiv.org/format/2303.14711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised detection of small hyperreflective features in ultrahigh resolution optical coherence tomography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reimann%2C+M">Marcel Reimann</a>, <a href="/search/cs?searchtype=author&query=Won%2C+J">Jungeun Won</a>, <a href="/search/cs?searchtype=author&query=Takahashi%2C+H">Hiroyuki Takahashi</a>, <a href="/search/cs?searchtype=author&query=Yaghy%2C+A">Antonio Yaghy</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yunchan Hwang</a>, <a href="/search/cs?searchtype=author&query=Ploner%2C+S">Stefan Ploner</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Junhong Lin</a>, <a href="/search/cs?searchtype=author&query=Girgis%2C+J">Jessica Girgis</a>, <a href="/search/cs?searchtype=author&query=Lam%2C+K">Kenneth Lam</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Siyu Chen</a>, <a href="/search/cs?searchtype=author&query=Waheed%2C+N+K">Nadia K. Waheed</a>, <a href="/search/cs?searchtype=author&query=Maier%2C+A">Andreas Maier</a>, <a href="/search/cs?searchtype=author&query=Fujimoto%2C+J+G">James G. Fujimoto</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.14711v1-abstract-short" style="display: inline;"> Recent advances in optical coherence tomography such as the development of high speed ultrahigh resolution scanners and corresponding signal processing techniques may reveal new potential biomarkers in retinal diseases. Newly visible features are, for example, small hyperreflective specks in age-related macular degeneration. Identifying these new markers is crucial to investigate potential associa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14711v1-abstract-full').style.display = 'inline'; document.getElementById('2303.14711v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.14711v1-abstract-full" style="display: none;"> Recent advances in optical coherence tomography such as the development of high speed ultrahigh resolution scanners and corresponding signal processing techniques may reveal new potential biomarkers in retinal diseases. Newly visible features are, for example, small hyperreflective specks in age-related macular degeneration. Identifying these new markers is crucial to investigate potential association with disease progression and treatment outcomes. Therefore, it is necessary to reliably detect these features in 3D volumetric scans. Because manual labeling of entire volumes is infeasible a need for automatic detection arises. Labeled datasets are often not publicly available and there are usually large variations in scan protocols and scanner types. Thus, this work focuses on an unsupervised approach that is based on local peak-detection and random walker segmentation to detect small features on each B-scan of the volume. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14711v1-abstract-full').style.display = 'none'; document.getElementById('2303.14711v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as poster at BVM workshop 2023 (https://www.bvm-workshop.org/). The arXiv version provides full quality figures. 6 pages content (2 figures)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.08389">arXiv:2303.08389</a> <span> [<a href="https://arxiv.org/pdf/2303.08389">pdf</a>, <a href="https://arxiv.org/format/2303.08389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PR-MCS: Perturbation Robust Metric for MultiLingual Image Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yongil Kim</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yerin Hwang</a>, <a href="/search/cs?searchtype=author&query=Yun%2C+H">Hyeongu Yun</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+S">Seunghyun Yoon</a>, <a href="/search/cs?searchtype=author&query=Bui%2C+T">Trung Bui</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+K">Kyomin Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.08389v1-abstract-short" style="display: inline;"> Vulnerability to lexical perturbation is a critical weakness of automatic evaluation metrics for image captioning. This paper proposes Perturbation Robust Multi-Lingual CLIPScore(PR-MCS), which exhibits robustness to such perturbations, as a novel reference-free image captioning metric applicable to multiple languages. To achieve perturbation robustness, we fine-tune the text encoder of CLIP with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08389v1-abstract-full').style.display = 'inline'; document.getElementById('2303.08389v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.08389v1-abstract-full" style="display: none;"> Vulnerability to lexical perturbation is a critical weakness of automatic evaluation metrics for image captioning. This paper proposes Perturbation Robust Multi-Lingual CLIPScore(PR-MCS), which exhibits robustness to such perturbations, as a novel reference-free image captioning metric applicable to multiple languages. To achieve perturbation robustness, we fine-tune the text encoder of CLIP with our language-agnostic method to distinguish the perturbed text from the original text. To verify the robustness of PR-MCS, we introduce a new fine-grained evaluation dataset consisting of detailed captions, critical objects, and the relationships between the objects for 3, 000 images in five languages. In our experiments, PR-MCS significantly outperforms baseline metrics in capturing lexical noise of all various perturbation types in all five languages, proving that PR-MCS is highly robust to lexical perturbations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08389v1-abstract-full').style.display = 'none'; document.getElementById('2303.08389v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.08329">arXiv:2303.08329</a> <span> [<a href="https://arxiv.org/pdf/2303.08329">pdf</a>, <a href="https://arxiv.org/format/2303.08329">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Cross-speaker Emotion Transfer by Manipulating Speech Style Latents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jo%2C+S">Suhee Jo</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Younggun Lee</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+Y">Yookyung Shin</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yeongtae Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+T">Taesu Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.08329v1-abstract-short" style="display: inline;"> In recent years, emotional text-to-speech has shown considerable progress. However, it requires a large amount of labeled data, which is not easily accessible. Even if it is possible to acquire an emotional speech dataset, there is still a limitation in controlling emotion intensity. In this work, we propose a novel method for cross-speaker emotion transfer and manipulation using vector arithmetic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08329v1-abstract-full').style.display = 'inline'; document.getElementById('2303.08329v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.08329v1-abstract-full" style="display: none;"> In recent years, emotional text-to-speech has shown considerable progress. However, it requires a large amount of labeled data, which is not easily accessible. Even if it is possible to acquire an emotional speech dataset, there is still a limitation in controlling emotion intensity. In this work, we propose a novel method for cross-speaker emotion transfer and manipulation using vector arithmetic in latent style space. By leveraging only a few labeled samples, we generate emotional speech from reading-style speech without losing the speaker identity. Furthermore, emotion strength is readily controllable using a scalar value, providing an intuitive way for users to manipulate speech. Experimental results show the proposed method affords superior performance in terms of expressiveness, naturalness, and controllability, preserving speaker identity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08329v1-abstract-full').style.display = 'none'; document.getElementById('2303.08329v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted to ICASSP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.01629">arXiv:2211.01629</a> <span> [<a href="https://arxiv.org/pdf/2211.01629">pdf</a>, <a href="https://arxiv.org/format/2211.01629">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Image-based Early Detection System for Wildfires </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ranadive%2C+O">Omkar Ranadive</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jisu Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Serin Lee</a>, <a href="/search/cs?searchtype=author&query=Cha%2C+Y">Youngseo Cha</a>, <a href="/search/cs?searchtype=author&query=Park%2C+H">Heechan Park</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+M">Minkook Cho</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y+K">Young K. Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.01629v1-abstract-short" style="display: inline;"> Wildfires are a disastrous phenomenon which cause damage to land, loss of property, air pollution, and even loss of human life. Due to the warmer and drier conditions created by climate change, more severe and uncontrollable wildfires are expected to occur in the coming years. This could lead to a global wildfire crisis and have dire consequences on our planet. Hence, it has become imperative to u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.01629v1-abstract-full').style.display = 'inline'; document.getElementById('2211.01629v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.01629v1-abstract-full" style="display: none;"> Wildfires are a disastrous phenomenon which cause damage to land, loss of property, air pollution, and even loss of human life. Due to the warmer and drier conditions created by climate change, more severe and uncontrollable wildfires are expected to occur in the coming years. This could lead to a global wildfire crisis and have dire consequences on our planet. Hence, it has become imperative to use technology to help prevent the spread of wildfires. One way to prevent the spread of wildfires before they become too large is to perform early detection i.e, detecting the smoke before the actual fire starts. In this paper, we present our Wildfire Detection and Alert System which use machine learning to detect wildfire smoke with a high degree of accuracy and can send immediate alerts to users. Our technology is currently being used in the USA to monitor data coming in from hundreds of cameras daily. We show that our system has a high true detection rate and a low false detection rate. Our performance evaluation study also shows that on an average our system detects wildfire smoke faster than an actual person. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.01629v1-abstract-full').style.display = 'none'; document.getElementById('2211.01629v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Tackling Climate Change with Machine Learning workshop, Thirty-sixth Conference on Neural Information Processing Systems (NeurIPS 2022)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.11672">arXiv:2210.11672</a> <span> [<a href="https://arxiv.org/pdf/2210.11672">pdf</a>, <a href="https://arxiv.org/format/2210.11672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Stochastic Adaptive Activation Function </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kyungsu Lee</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jaeseung Yang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Haeyun Lee</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+J+Y">Jae Youn Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.11672v1-abstract-short" style="display: inline;"> The simulation of human neurons and neurotransmission mechanisms has been realized in deep neural networks based on the theoretical implementations of activation functions. However, recent studies have reported that the threshold potential of neurons exhibits different values according to the locations and types of individual neurons, and that the activation functions have limitations in terms of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.11672v1-abstract-full').style.display = 'inline'; document.getElementById('2210.11672v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.11672v1-abstract-full" style="display: none;"> The simulation of human neurons and neurotransmission mechanisms has been realized in deep neural networks based on the theoretical implementations of activation functions. However, recent studies have reported that the threshold potential of neurons exhibits different values according to the locations and types of individual neurons, and that the activation functions have limitations in terms of representing this variability. Therefore, this study proposes a simple yet effective activation function that facilitates different thresholds and adaptive activations according to the positions of units and the contexts of inputs. Furthermore, the proposed activation function mathematically exhibits a more generalized form of Swish activation function, and thus we denoted it as Adaptive SwisH (ASH). ASH highlights informative features that exhibit large values in the top percentiles in an input, whereas it rectifies low values. Most importantly, ASH exhibits trainable, adaptive, and context-aware properties compared to other activation functions. Furthermore, ASH represents general formula of the previously studied activation function and provides a reasonable mathematical background for the superior performance. To validate the effectiveness and robustness of ASH, we implemented ASH into many deep learning models for various tasks, including classification, detection, segmentation, and image generation. Experimental analysis demonstrates that our activation function can provide the benefits of more accurate prediction and earlier convergence in many deep learning applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.11672v1-abstract-full').style.display = 'none'; document.getElementById('2210.11672v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.09491">arXiv:2209.09491</a> <span> [<a href="https://arxiv.org/pdf/2209.09491">pdf</a>, <a href="https://arxiv.org/format/2209.09491">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Deep Q-Network for AI Soccer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+C">Curie Kim</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yewon Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jong-Hwan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.09491v2-abstract-short" style="display: inline;"> Reinforcement learning has shown an outstanding performance in the applications of games, particularly in Atari games as well as Go. Based on these successful examples, we attempt to apply one of the well-known reinforcement learning algorithms, Deep Q-Network, to the AI Soccer game. AI Soccer is a 5:5 robot soccer game where each participant develops an algorithm that controls five robots in a te… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.09491v2-abstract-full').style.display = 'inline'; document.getElementById('2209.09491v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.09491v2-abstract-full" style="display: none;"> Reinforcement learning has shown an outstanding performance in the applications of games, particularly in Atari games as well as Go. Based on these successful examples, we attempt to apply one of the well-known reinforcement learning algorithms, Deep Q-Network, to the AI Soccer game. AI Soccer is a 5:5 robot soccer game where each participant develops an algorithm that controls five robots in a team to defeat the opponent participant. Deep Q-Network is designed to implement our original rewards, the state space, and the action space to train each agent so that it can take proper actions in different situations during the game. Our algorithm was able to successfully train the agents, and its performance was preliminarily proven through the mini-competition against 10 teams wishing to take part in the AI Soccer international competition. The competition was organized by the AI World Cup committee, in conjunction with the WCG 2019 Xi'an AI Masters. With our algorithm, we got the achievement of advancing to the round of 16 in this international competition with 130 teams from 39 countries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.09491v2-abstract-full').style.display = 'none'; document.getElementById('2209.09491v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.06000">arXiv:2207.06000</a> <span> [<a href="https://arxiv.org/pdf/2207.06000">pdf</a>, <a href="https://arxiv.org/format/2207.06000">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Text-driven Emotional Style Control and Cross-speaker Style Transfer in Neural TTS </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shin%2C+Y">Yookyung Shin</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Younggun Lee</a>, <a href="/search/cs?searchtype=author&query=Jo%2C+S">Suhee Jo</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yeongtae Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+T">Taesu Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.06000v1-abstract-short" style="display: inline;"> Expressive text-to-speech has shown improved performance in recent years. However, the style control of synthetic speech is often restricted to discrete emotion categories and requires training data recorded by the target speaker in the target style. In many practical situations, users may not have reference speech recorded in target emotion but still be interested in controlling speech style just… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06000v1-abstract-full').style.display = 'inline'; document.getElementById('2207.06000v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.06000v1-abstract-full" style="display: none;"> Expressive text-to-speech has shown improved performance in recent years. However, the style control of synthetic speech is often restricted to discrete emotion categories and requires training data recorded by the target speaker in the target style. In many practical situations, users may not have reference speech recorded in target emotion but still be interested in controlling speech style just by typing text description of desired emotional style. In this paper, we propose a text-based interface for emotional style control and cross-speaker style transfer in multi-speaker TTS. We propose the bi-modal style encoder which models the semantic relationship between text description embedding and speech style embedding with a pretrained language model. To further improve cross-speaker style transfer on disjoint, multi-style datasets, we propose the novel style loss. The experimental results show that our model can generate high-quality expressive speech even in unseen style. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06000v1-abstract-full').style.display = 'none'; document.getElementById('2207.06000v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Interspeech 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.09185">arXiv:2205.09185</a> <span> [<a href="https://arxiv.org/pdf/2205.09185">pdf</a>, <a href="https://arxiv.org/format/2205.09185">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Nuclear Experiment">nucl-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.nima.2022.167748">10.1016/j.nima.2022.167748 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> AI-assisted Optimization of the ECCE Tracking System at the Electron Ion Collider </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fanelli%2C+C">C. Fanelli</a>, <a href="/search/cs?searchtype=author&query=Papandreou%2C+Z">Z. Papandreou</a>, <a href="/search/cs?searchtype=author&query=Suresh%2C+K">K. Suresh</a>, <a href="/search/cs?searchtype=author&query=Adkins%2C+J+K">J. K. Adkins</a>, <a href="/search/cs?searchtype=author&query=Akiba%2C+Y">Y. Akiba</a>, <a href="/search/cs?searchtype=author&query=Albataineh%2C+A">A. Albataineh</a>, <a href="/search/cs?searchtype=author&query=Amaryan%2C+M">M. Amaryan</a>, <a href="/search/cs?searchtype=author&query=Arsene%2C+I+C">I. C. Arsene</a>, <a href="/search/cs?searchtype=author&query=Gayoso%2C+C+A">C. Ayerbe Gayoso</a>, <a href="/search/cs?searchtype=author&query=Bae%2C+J">J. Bae</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+X">X. Bai</a>, <a href="/search/cs?searchtype=author&query=Baker%2C+M+D">M. D. Baker</a>, <a href="/search/cs?searchtype=author&query=Bashkanov%2C+M">M. Bashkanov</a>, <a href="/search/cs?searchtype=author&query=Bellwied%2C+R">R. Bellwied</a>, <a href="/search/cs?searchtype=author&query=Benmokhtar%2C+F">F. Benmokhtar</a>, <a href="/search/cs?searchtype=author&query=Berdnikov%2C+V">V. Berdnikov</a>, <a href="/search/cs?searchtype=author&query=Bernauer%2C+J+C">J. C. Bernauer</a>, <a href="/search/cs?searchtype=author&query=Bock%2C+F">F. Bock</a>, <a href="/search/cs?searchtype=author&query=Boeglin%2C+W">W. Boeglin</a>, <a href="/search/cs?searchtype=author&query=Borysova%2C+M">M. Borysova</a>, <a href="/search/cs?searchtype=author&query=Brash%2C+E">E. Brash</a>, <a href="/search/cs?searchtype=author&query=Brindza%2C+P">P. Brindza</a>, <a href="/search/cs?searchtype=author&query=Briscoe%2C+W+J">W. J. Briscoe</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+M">M. Brooks</a>, <a href="/search/cs?searchtype=author&query=Bueltmann%2C+S">S. Bueltmann</a> , et al. (258 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.09185v2-abstract-short" style="display: inline;"> The Electron-Ion Collider (EIC) is a cutting-edge accelerator facility that will study the nature of the "glue" that binds the building blocks of the visible matter in the universe. The proposed experiment will be realized at Brookhaven National Laboratory in approximately 10 years from now, with detector design and R&D currently ongoing. Notably, EIC is one of the first large-scale facilities to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09185v2-abstract-full').style.display = 'inline'; document.getElementById('2205.09185v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.09185v2-abstract-full" style="display: none;"> The Electron-Ion Collider (EIC) is a cutting-edge accelerator facility that will study the nature of the "glue" that binds the building blocks of the visible matter in the universe. The proposed experiment will be realized at Brookhaven National Laboratory in approximately 10 years from now, with detector design and R&D currently ongoing. Notably, EIC is one of the first large-scale facilities to leverage Artificial Intelligence (AI) already starting from the design and R&D phases. The EIC Comprehensive Chromodynamics Experiment (ECCE) is a consortium that proposed a detector design based on a 1.5T solenoid. The EIC detector proposal review concluded that the ECCE design will serve as the reference design for an EIC detector. Herein we describe a comprehensive optimization of the ECCE tracker using AI. The work required a complex parametrization of the simulated detector system. Our approach dealt with an optimization problem in a multidimensional design space driven by multiple objectives that encode the detector performance, while satisfying several mechanical constraints. We describe our strategy and show results obtained for the ECCE tracking system. The AI-assisted design is agnostic to the simulation framework and can be extended to other sub-detectors or to a system of sub-detectors to further optimize the performance of the EIC detector. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09185v2-abstract-full').style.display = 'none'; document.getElementById('2205.09185v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 18 figures, 2 appendices, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.02403">arXiv:2204.02403</a> <span> [<a href="https://arxiv.org/pdf/2204.02403">pdf</a>, <a href="https://arxiv.org/format/2204.02403">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Explainable Deep Learning Algorithm for Distinguishing Incomplete Kawasaki Disease by Coronary Artery Lesions on Echocardiographic Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+H">Haeyun Lee</a>, <a href="/search/cs?searchtype=author&query=Eun%2C+Y">Yongsoon Eun</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+J+Y">Jae Youn Hwang</a>, <a href="/search/cs?searchtype=author&query=Eun%2C+L+Y">Lucy Youngmin Eun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.02403v1-abstract-short" style="display: inline;"> Background and Objective: Incomplete Kawasaki disease (KD) has often been misdiagnosed due to a lack of the clinical manifestations of classic KD. However, it is associated with a markedly higher prevalence of coronary artery lesions. Identifying coronary artery lesions by echocardiography is important for the timely diagnosis of and favorable outcomes in KD. Moreover, similar to KD, coronavirus d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02403v1-abstract-full').style.display = 'inline'; document.getElementById('2204.02403v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.02403v1-abstract-full" style="display: none;"> Background and Objective: Incomplete Kawasaki disease (KD) has often been misdiagnosed due to a lack of the clinical manifestations of classic KD. However, it is associated with a markedly higher prevalence of coronary artery lesions. Identifying coronary artery lesions by echocardiography is important for the timely diagnosis of and favorable outcomes in KD. Moreover, similar to KD, coronavirus disease 2019, currently causing a worldwide pandemic, also manifests with fever; therefore, it is crucial at this moment that KD should be distinguished clearly among the febrile diseases in children. In this study, we aimed to validate a deep learning algorithm for classification of KD and other acute febrile diseases. Methods: We obtained coronary artery images by echocardiography of children (n = 88 for KD; n = 65 for pneumonia). We trained six deep learning networks (VGG19, Xception, ResNet50, ResNext50, SE-ResNet50, and SE-ResNext50) using the collected data. Results: SE-ResNext50 showed the best performance in terms of accuracy, specificity, and precision in the classification. SE-ResNext50 offered a precision of 76.35%, a sensitivity of 82.64%, and a specificity of 58.12%. Conclusions: The results of our study suggested that deep learning algorithms have similar performance to an experienced cardiologist in detecting coronary artery lesions to facilitate the diagnosis of KD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02403v1-abstract-full').style.display = 'none'; document.getElementById('2204.02403v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.09150">arXiv:2202.09150</a> <span> [<a href="https://arxiv.org/pdf/2202.09150">pdf</a>, <a href="https://arxiv.org/format/2202.09150">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Personalization Trade-offs in Designing a Dialogue-based Information System for Support-Seeking of Sexual Violence Survivors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+H">Hyeok Kim</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Youjin Hwang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jieun Lee</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+Y">Youngjin Kwon</a>, <a href="/search/cs?searchtype=author&query=Park%2C+Y">Yujin Park</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Joonhwan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.09150v1-abstract-short" style="display: inline;"> The lack of reliable, personalized information often complicates sexual violence survivors' support-seeking. Recently, there is an emerging approach to conversational information systems for support-seeking of sexual violence survivors, featuring personalization with wide availability and anonymity. However, a single best solution might not exist as sexual violence survivors have different needs a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.09150v1-abstract-full').style.display = 'inline'; document.getElementById('2202.09150v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.09150v1-abstract-full" style="display: none;"> The lack of reliable, personalized information often complicates sexual violence survivors' support-seeking. Recently, there is an emerging approach to conversational information systems for support-seeking of sexual violence survivors, featuring personalization with wide availability and anonymity. However, a single best solution might not exist as sexual violence survivors have different needs and purposes in seeking support channels. To better envision conversational support-seeking systems for sexual violence survivors, we explore personalization trade-offs in designing such information systems. We implement a high-fidelity prototype dialogue-based information system through four design workshop sessions with three professional caregivers and interviewed with four self-identified survivors using our prototype. We then identify two forms of personalization trade-offs for conversational support-seeking systems: (1) specificity and sensitivity in understanding users and (2) relevancy and inclusiveness in providing information. To handle these trade-offs, we propose a reversed approach that starts from designing information and inclusive tailoring that considers unspecified needs, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.09150v1-abstract-full').style.display = 'none'; document.getElementById('2202.09150v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 2 figures, 1 table, accepted for CHI 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.01863">arXiv:2202.01863</a> <span> [<a href="https://arxiv.org/pdf/2202.01863">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Best Practices and Scoring System on Reviewing A.I. based Medical Imaging Papers: Part 1 Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kline%2C+T+L">Timothy L. Kline</a>, <a href="/search/cs?searchtype=author&query=Kitamura%2C+F">Felipe Kitamura</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+I">Ian Pan</a>, <a href="/search/cs?searchtype=author&query=Korchi%2C+A+M">Amine M. Korchi</a>, <a href="/search/cs?searchtype=author&query=Tenenholtz%2C+N">Neil Tenenholtz</a>, <a href="/search/cs?searchtype=author&query=Moy%2C+L">Linda Moy</a>, <a href="/search/cs?searchtype=author&query=Gichoya%2C+J+W">Judy Wawira Gichoya</a>, <a href="/search/cs?searchtype=author&query=Santos%2C+I">Igor Santos</a>, <a href="/search/cs?searchtype=author&query=Blumer%2C+S">Steven Blumer</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+M+Y">Misha Ysabel Hwang</a>, <a href="/search/cs?searchtype=author&query=Git%2C+K">Kim-Ann Git</a>, <a href="/search/cs?searchtype=author&query=Shroff%2C+A">Abishek Shroff</a>, <a href="/search/cs?searchtype=author&query=Walach%2C+E">Elad Walach</a>, <a href="/search/cs?searchtype=author&query=Shih%2C+G">George Shih</a>, <a href="/search/cs?searchtype=author&query=Langer%2C+S">Steve Langer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.01863v1-abstract-short" style="display: inline;"> With the recent advances in A.I. methodologies and their application to medical imaging, there has been an explosion of related research programs utilizing these techniques to produce state-of-the-art classification performance. Ultimately, these research programs culminate in submission of their work for consideration in peer reviewed journals. To date, the criteria for acceptance vs. rejection i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.01863v1-abstract-full').style.display = 'inline'; document.getElementById('2202.01863v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.01863v1-abstract-full" style="display: none;"> With the recent advances in A.I. methodologies and their application to medical imaging, there has been an explosion of related research programs utilizing these techniques to produce state-of-the-art classification performance. Ultimately, these research programs culminate in submission of their work for consideration in peer reviewed journals. To date, the criteria for acceptance vs. rejection is often subjective; however, reproducible science requires reproducible review. The Machine Learning Education Sub-Committee of SIIM has identified a knowledge gap and a serious need to establish guidelines for reviewing these studies. Although there have been several recent papers with this goal, this present work is written from the machine learning practitioners standpoint. In this series, the committee will address the best practices to be followed in an A.I.-based study and present the required sections in terms of examples and discussion of what should be included to make the studies cohesive, reproducible, accurate, and self-contained. This first entry in the series focuses on the task of image classification. Elements such as dataset curation, data pre-processing steps, defining an appropriate reference standard, data partitioning, model architecture and training are discussed. The sections are presented as they would be detailed in a typical manuscript, with content describing the necessary information that should be included to make sure the study is of sufficient quality to be considered for publication. The goal of this series is to provide resources to not only help improve the review process for A.I.-based medical imaging papers, but to facilitate a standard for the information that is presented within all components of the research study. We hope to provide quantitative metrics in what otherwise may be a qualitative review process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.01863v1-abstract-full').style.display = 'none'; document.getElementById('2202.01863v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.00783">arXiv:2202.00783</a> <span> [<a href="https://arxiv.org/pdf/2202.00783">pdf</a>, <a href="https://arxiv.org/format/2202.00783">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Modeling ventilation in a low-income house in Dhaka, Bangladesh </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yunjae Hwang</a>, <a href="/search/cs?searchtype=author&query=Laura"> Laura</a>, <a href="/search/cs?searchtype=author&query=Kwong"> Kwong</a>, <a href="/search/cs?searchtype=author&query=Munim%2C+M+S">Mohammad Saeed Munim</a>, <a href="/search/cs?searchtype=author&query=Nizame%2C+F+A">Fosiul Alam Nizame</a>, <a href="/search/cs?searchtype=author&query=Luby%2C+S">Stephen Luby</a>, <a href="/search/cs?searchtype=author&query=Gorl%C3%A9%2C+C">Catherine Gorl茅</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.00783v1-abstract-short" style="display: inline;"> According to UNICEF, pneumonia is the leading cause of death in children under 5. 70% of worldwide pneumonia deaths occur in only 15 countries, including Bangladesh. Previous research has indicated a potential association between the incidence of pneumonia and the presence of cross-ventilation in slum housing in Dhaka, Bangladesh. The objective of this research is to establish a validated computat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.00783v1-abstract-full').style.display = 'inline'; document.getElementById('2202.00783v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.00783v1-abstract-full" style="display: none;"> According to UNICEF, pneumonia is the leading cause of death in children under 5. 70% of worldwide pneumonia deaths occur in only 15 countries, including Bangladesh. Previous research has indicated a potential association between the incidence of pneumonia and the presence of cross-ventilation in slum housing in Dhaka, Bangladesh. The objective of this research is to establish a validated computational framework that can predict ventilation rates in slum homes to support further studies investigating this correlation. To achieve this objective we employ a building thermal model (BTM) in combination with uncertainty quantification (UQ). The BTM solves for the time-evolution of volume-averaged temperatures in a typical home, considering different ventilation configurations. The UQ method propagates uncertainty in model parameters, weather inputs, and physics models to predict mean values and 95% confidence intervals for the quantities of interest, namely temperatures and ventilation rates in terms of air changes per hour (ACH). The model predictions are compared to on-site field measurements of air and thermal mass temperatures, and of ACH. The results indicate that the use of standard cross- or single-sided ventilation models limits the accuracy of the ACH predictions; in contrast, a model based on a similarity relationship informed by the available ACH measurements can produce more accurate predictions with confidence intervals that encompass the measurements for 12 of the 17 available data points. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.00783v1-abstract-full').style.display = 'none'; document.getElementById('2202.00783v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.01254">arXiv:2111.01254</a> <span> [<a href="https://arxiv.org/pdf/2111.01254">pdf</a>, <a href="https://arxiv.org/ps/2111.01254">ps</a>, <a href="https://arxiv.org/format/2111.01254">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> </div> </div> <p class="title is-5 mathjax"> Unique Games hardness of Quantum Max-Cut, and a conjectured vector-valued Borell's inequality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yeongwoo Hwang</a>, <a href="/search/cs?searchtype=author&query=Neeman%2C+J">Joe Neeman</a>, <a href="/search/cs?searchtype=author&query=Parekh%2C+O">Ojas Parekh</a>, <a href="/search/cs?searchtype=author&query=Thompson%2C+K">Kevin Thompson</a>, <a href="/search/cs?searchtype=author&query=Wright%2C+J">John Wright</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.01254v3-abstract-short" style="display: inline;"> The Gaussian noise stability of a function $f:\mathbb{R}^n \to \{-1, 1\}$ is the expected value of $f(\boldsymbol{x}) \cdot f(\boldsymbol{y})$ over $蟻$-correlated Gaussian random variables $\boldsymbol{x}$ and $\boldsymbol{y}$. Borell's inequality states that for $-1 \leq 蟻\leq 0$, this is minimized by the halfspace $f(x) = \mathrm{sign}(x_1)$. In this work, we generalize this result to hold for f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.01254v3-abstract-full').style.display = 'inline'; document.getElementById('2111.01254v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.01254v3-abstract-full" style="display: none;"> The Gaussian noise stability of a function $f:\mathbb{R}^n \to \{-1, 1\}$ is the expected value of $f(\boldsymbol{x}) \cdot f(\boldsymbol{y})$ over $蟻$-correlated Gaussian random variables $\boldsymbol{x}$ and $\boldsymbol{y}$. Borell's inequality states that for $-1 \leq 蟻\leq 0$, this is minimized by the halfspace $f(x) = \mathrm{sign}(x_1)$. In this work, we generalize this result to hold for functions $f:\mathbb{R}^n \to S^{k-1}$ which output $k$-dimensional unit vectors. Our main conjecture, which we call the $\textit{vector-valued Borell's inequality}$, asserts that the expected value of $\langle f(\boldsymbol{x}), f(\boldsymbol{y})\rangle$ is minimized by the function $f(x) = x_{\leq k} / \Vert x_{\leq k} \Vert$, where $x_{\leq k} = (x_1, \ldots, x_k)$. We give several pieces of evidence in favor of this conjecture, including a proof that it does indeed hold in the special case of $n = k$. As an application of this conjecture, we show that it implies several hardness of approximation results for a special case of the local Hamiltonian problem related to the anti-ferromagnetic Heisenberg model known as Quantum Max-Cut. This can be viewed as a natural quantum analogue of the classical Max-Cut problem and has been proposed as a useful testbed for developing algorithms. We show the following, assuming our conjecture: (1) The integrality gap of the basic SDP is $0.498$, matching an existing rounding algorithm. Combined with existing results, this shows that the basic SDP does not achieve the optimal approximation ratio. (2) It is Unique Games-hard (UG-hard) to compute a $(0.956+\varepsilon)$-approximation to the value of the best product state, matching an existing approximation algorithm. (3) It is UG-hard to compute a $(0.956+\varepsilon)$-approximation to the value of the best (possibly entangled) state. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.01254v3-abstract-full').style.display = 'none'; document.getElementById('2111.01254v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">76 pages; v3 treats the vector-valued Borell's inequality as a conjecture rather than a theorem, due to an error in previous versions</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.05712">arXiv:2109.05712</a> <span> [<a href="https://arxiv.org/pdf/2109.05712">pdf</a>, <a href="https://arxiv.org/format/2109.05712">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Contrastive Learning for Context-aware Neural Machine TranslationUsing Coreference Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yongkeun Hwang</a>, <a href="/search/cs?searchtype=author&query=Yun%2C+H">Hyungu Yun</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+K">Kyomin Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.05712v1-abstract-short" style="display: inline;"> Context-aware neural machine translation (NMT) incorporates contextual information of surrounding texts, that can improve the translation quality of document-level machine translation. Many existing works on context-aware NMT have focused on developing new model architectures for incorporating additional contexts and have shown some promising results. However, most existing works rely on cross-ent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.05712v1-abstract-full').style.display = 'inline'; document.getElementById('2109.05712v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.05712v1-abstract-full" style="display: none;"> Context-aware neural machine translation (NMT) incorporates contextual information of surrounding texts, that can improve the translation quality of document-level machine translation. Many existing works on context-aware NMT have focused on developing new model architectures for incorporating additional contexts and have shown some promising results. However, most existing works rely on cross-entropy loss, resulting in limited use of contextual information. In this paper, we propose CorefCL, a novel data augmentation and contrastive learning scheme based on coreference between the source and contextual sentences. By corrupting automatically detected coreference mentions in the contextual sentence, CorefCL can train the model to be sensitive to coreference inconsistency. We experimented with our method on common context-aware NMT models and two document-level translation tasks. In the experiments, our method consistently improved BLEU of compared models on English-German and English-Korean tasks. We also show that our method significantly improves coreference resolution in the English-German contrastive test suite. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.05712v1-abstract-full').style.display = 'none'; document.getElementById('2109.05712v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WMT 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.01729">arXiv:2109.01729</a> <span> [<a href="https://arxiv.org/pdf/2109.01729">pdf</a>, <a href="https://arxiv.org/format/2109.01729">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Applying the Persona of User's Family Member and the Doctor to the Conversational Agents for Healthcare </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Youjin Hwang</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+D">Donghoon Shin</a>, <a href="/search/cs?searchtype=author&query=Baek%2C+S">Sion Baek</a>, <a href="/search/cs?searchtype=author&query=Suh%2C+B">Bongwon Suh</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Joonhwan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.01729v1-abstract-short" style="display: inline;"> Conversational agents have been showing lots of opportunities in healthcare by taking over a lot of tasks that used to be done by a human. One of the major functions of conversational healthcare agent is intervening users' daily behaviors. In this case, forming an intimate and trustful relationship with users is one of the major issues. Factors affecting human-agent relationship should be deeply e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.01729v1-abstract-full').style.display = 'inline'; document.getElementById('2109.01729v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.01729v1-abstract-full" style="display: none;"> Conversational agents have been showing lots of opportunities in healthcare by taking over a lot of tasks that used to be done by a human. One of the major functions of conversational healthcare agent is intervening users' daily behaviors. In this case, forming an intimate and trustful relationship with users is one of the major issues. Factors affecting human-agent relationship should be deeply explored to improve long-term acceptance of healthcare agent. Even though a bunch of ideas and researches have been suggested to increase the acceptance of conversational agents in healthcare, challenges still remain. From the preliminary work we conducted, we suggest an idea of applying the personas of users' family members and the doctor who are in the relationship with users in the real world as a solution for forming the rigid relationship between humans and the chatbot. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.01729v1-abstract-full').style.display = 'none'; document.getElementById('2109.01729v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at CHI 2020 Workshop on Conversational Agents for Health and Wellbeing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.09030">arXiv:2108.09030</a> <span> [<a href="https://arxiv.org/pdf/2108.09030">pdf</a>, <a href="https://arxiv.org/format/2108.09030">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Type Anywhere You Want: An Introduction to Invisible Mobile Keyboard </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yoo%2C+S">Sahng-Min Yoo</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+U">Ue-Hwan Kim</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yewon Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jong-Hwan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.09030v1-abstract-short" style="display: inline;"> Contemporary soft keyboards possess limitations: the lack of physical feedback results in an increase of typos, and the interface of soft keyboards degrades the utility of the screen. To overcome these limitations, we propose an Invisible Mobile Keyboard (IMK), which lets users freely type on the desired area without any constraints. To facilitate a data-driven IMK decoding task, we have collected… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.09030v1-abstract-full').style.display = 'inline'; document.getElementById('2108.09030v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.09030v1-abstract-full" style="display: none;"> Contemporary soft keyboards possess limitations: the lack of physical feedback results in an increase of typos, and the interface of soft keyboards degrades the utility of the screen. To overcome these limitations, we propose an Invisible Mobile Keyboard (IMK), which lets users freely type on the desired area without any constraints. To facilitate a data-driven IMK decoding task, we have collected the most extensive text-entry dataset (approximately 2M pairs of typing positions and the corresponding characters). Additionally, we propose our baseline decoder along with a semantic typo correction mechanism based on self-attention, which decodes such unconstrained inputs with high accuracy (96.0%). Moreover, the user study reveals that the users could type faster and feel convenience and satisfaction to IMK with our decoder. Lastly, we make the source code and the dataset public to contribute to the research community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.09030v1-abstract-full').style.display = 'none'; document.getElementById('2108.09030v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IJCAI 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.09021">arXiv:2104.09021</a> <span> [<a href="https://arxiv.org/pdf/2104.09021">pdf</a>, <a href="https://arxiv.org/format/2104.09021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Writing in The Air: Unconstrained Text Recognition from Finger Movement Using Spatio-Temporal Convolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+U">Ue-Hwan Kim</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yewon Hwang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Sun-Kyung Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jong-Hwan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.09021v1-abstract-short" style="display: inline;"> In this paper, we introduce a new benchmark dataset for the challenging writing in the air (WiTA) task -- an elaborate task bridging vision and NLP. WiTA implements an intuitive and natural writing method with finger movement for human-computer interaction (HCI). Our WiTA dataset will facilitate the development of data-driven WiTA systems which thus far have displayed unsatisfactory performance --… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.09021v1-abstract-full').style.display = 'inline'; document.getElementById('2104.09021v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.09021v1-abstract-full" style="display: none;"> In this paper, we introduce a new benchmark dataset for the challenging writing in the air (WiTA) task -- an elaborate task bridging vision and NLP. WiTA implements an intuitive and natural writing method with finger movement for human-computer interaction (HCI). Our WiTA dataset will facilitate the development of data-driven WiTA systems which thus far have displayed unsatisfactory performance -- due to lack of dataset as well as traditional statistical models they have adopted. Our dataset consists of five sub-datasets in two languages (Korean and English) and amounts to 209,926 video instances from 122 participants. We capture finger movement for WiTA with RGB cameras to ensure wide accessibility and cost-efficiency. Next, we propose spatio-temporal residual network architectures inspired by 3D ResNet. These models perform unconstrained text recognition from finger movement, guarantee a real-time operation by processing 435 and 697 decoding frames-per-second for Korean and English, respectively, and will serve as an evaluation standard. Our dataset and the source codes are available at https://github.com/Uehwan/WiTA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.09021v1-abstract-full').style.display = 'none'; document.getElementById('2104.09021v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.11819">arXiv:2004.11819</a> <span> [<a href="https://arxiv.org/pdf/2004.11819">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TGRS.2020.3010055">10.1109/TGRS.2020.3010055 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Domain Adaptive Transfer Attack (DATA)-based Segmentation Networks for Building Extraction from Aerial Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Na%2C+Y">Younghwan Na</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J+H">Jun Hee Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kyungsu Lee</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Juhum Park</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+J+Y">Jae Youn Hwang</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+J+P">Jihwan P. Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.11819v2-abstract-short" style="display: inline;"> Semantic segmentation models based on convolutional neural networks (CNNs) have gained much attention in relation to remote sensing and have achieved remarkable performance for the extraction of buildings from high-resolution aerial images. However, the issue of limited generalization for unseen images remains. When there is a domain gap between the training and test datasets, CNN-based segmentati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11819v2-abstract-full').style.display = 'inline'; document.getElementById('2004.11819v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.11819v2-abstract-full" style="display: none;"> Semantic segmentation models based on convolutional neural networks (CNNs) have gained much attention in relation to remote sensing and have achieved remarkable performance for the extraction of buildings from high-resolution aerial images. However, the issue of limited generalization for unseen images remains. When there is a domain gap between the training and test datasets, CNN-based segmentation models trained by a training dataset fail to segment buildings for the test dataset. In this paper, we propose segmentation networks based on a domain adaptive transfer attack (DATA) scheme for building extraction from aerial images. The proposed system combines the domain transfer and adversarial attack concepts. Based on the DATA scheme, the distribution of the input images can be shifted to that of the target images while turning images into adversarial examples against a target network. Defending adversarial examples adapted to the target domain can overcome the performance degradation due to the domain gap and increase the robustness of the segmentation model. Cross-dataset experiments and the ablation study are conducted for the three different datasets: the Inria aerial image labeling dataset, the Massachusetts building dataset, and the WHU East Asia dataset. Compared to the performance of the segmentation network without the DATA scheme, the proposed method shows improvements in the overall IoU. Moreover, it is verified that the proposed method outperforms even when compared to feature adaptation (FA) and output space adaptation (OSA). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11819v2-abstract-full').style.display = 'none'; document.getElementById('2004.11819v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2001.01401">arXiv:2001.01401</a> <span> [<a href="https://arxiv.org/pdf/2001.01401">pdf</a>, <a href="https://arxiv.org/format/2001.01401">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Mel-spectrogram augmentation for sequence to sequence voice conversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yeongtae Hwang</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+H">Hyemin Cho</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongsun Yang</a>, <a href="/search/cs?searchtype=author&query=Won%2C+D">Dong-Ok Won</a>, <a href="/search/cs?searchtype=author&query=Oh%2C+I">Insoo Oh</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Seong-Whan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2001.01401v2-abstract-short" style="display: inline;"> For training the sequence-to-sequence voice conversion model, we need to handle an issue of insufficient data about the number of speech pairs which consist of the same utterance. This study experimentally investigated the effects of Mel-spectrogram augmentation on training the sequence-to-sequence voice conversion (VC) model from scratch. For Mel-spectrogram augmentation, we adopted the policies… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.01401v2-abstract-full').style.display = 'inline'; document.getElementById('2001.01401v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2001.01401v2-abstract-full" style="display: none;"> For training the sequence-to-sequence voice conversion model, we need to handle an issue of insufficient data about the number of speech pairs which consist of the same utterance. This study experimentally investigated the effects of Mel-spectrogram augmentation on training the sequence-to-sequence voice conversion (VC) model from scratch. For Mel-spectrogram augmentation, we adopted the policies proposed in SpecAugment. In addition, we proposed new policies (i.e., frequency warping, loudness and time length control) for more data variations. Moreover, to find the appropriate hyperparameters of augmentation policies without training the VC model, we proposed hyperparameter search strategy and the new metric for reducing experimental cost, namely deformation per deteriorating ratio. We compared the effect of these Mel-spectrogram augmentation methods based on various sizes of training set and augmentation policies. In the experimental results, the time axis warping based policies (i.e., time length control and time warping.) showed better performance than other policies. These results indicate that the use of the Mel-spectrogram augmentation is more beneficial for training the VC model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.01401v2-abstract-full').style.display = 'none'; document.getElementById('2001.01401v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5pages, 1 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1902.02905">arXiv:1902.02905</a> <span> [<a href="https://arxiv.org/pdf/1902.02905">pdf</a>, <a href="https://arxiv.org/format/1902.02905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Mobile Artificial Intelligence Technology for Detecting Macula Edema and Subretinal Fluid on OCT Scans: Initial Results from the DATUM alpha Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Odaibo%2C+S+G">Stephen G. Odaibo</a>, <a href="/search/cs?searchtype=author&query=MomPremier%2C+M">Mikelson MomPremier</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+R+Y">Richard Y. Hwang</a>, <a href="/search/cs?searchtype=author&query=Yousuf%2C+S+J">Salman J. Yousuf</a>, <a href="/search/cs?searchtype=author&query=Williams%2C+S+L">Steven L. Williams</a>, <a href="/search/cs?searchtype=author&query=Grant%2C+J">Joshua Grant</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1902.02905v2-abstract-short" style="display: inline;"> Artificial Intelligence (AI) is necessary to address the large and growing deficit in retina and healthcare access globally. And mobile AI diagnostic platforms running in the Cloud may effectively and efficiently distribute such AI capability. Here we sought to evaluate the feasibility of Cloud-based mobile artificial intelligence for detection of retinal disease. And to evaluate the accuracy of a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.02905v2-abstract-full').style.display = 'inline'; document.getElementById('1902.02905v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1902.02905v2-abstract-full" style="display: none;"> Artificial Intelligence (AI) is necessary to address the large and growing deficit in retina and healthcare access globally. And mobile AI diagnostic platforms running in the Cloud may effectively and efficiently distribute such AI capability. Here we sought to evaluate the feasibility of Cloud-based mobile artificial intelligence for detection of retinal disease. And to evaluate the accuracy of a particular such system for detection of subretinal fluid (SRF) and macula edema (ME) on OCT scans. A multicenter retrospective image analysis was conducted in which board-certified ophthalmologists with fellowship training in retina evaluated OCT images of the macula. They noted the presence or absence of ME or SRF, then compared their assessment to that obtained from Fluid Intelligence, a mobile AI app that detects SRF and ME on OCT scans. Investigators consecutively selected retinal OCTs, while making effort to balance the number of scans with retinal fluid and scans without. Exclusion criteria included poor scan quality, ambiguous features, macula holes, retinoschisis, and dense epiretinal membranes. Accuracy in the form of sensitivity and specificity of the AI mobile App was determined by comparing its assessments to those of the retina specialists. At the time of this submission, five centers have completed their initial studies. This consists of a total of 283 OCT scans of which 155 had either ME or SRF ("wet") and 128 did not ("dry"). The sensitivity ranged from 82.5% to 97% with a weighted average of 89.3%. The specificity ranged from 52% to 100% with a weighted average of 81.23%. CONCLUSION: Cloud-based Mobile AI technology is feasible for the detection retinal disease. In particular, Fluid Intelligence (alpha version), is sufficiently accurate as a screening tool for SRF and ME, especially in underserved areas. Further studies and technology development is needed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.02905v2-abstract-full').style.display = 'none'; document.getElementById('1902.02905v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Initial results of the DATUM alpha Study were initially presented on August 13th 2018 in the Keynote Address at the 116th National Medical Association Annual Meeting & Scientific Assembly's New Innovations in Ophthalmology Session. The results were also presented on September 21st 2018 in a Podium Lecture during Alumni Day at the University of Michigan--Ann Arbor Kellogg Eye Center</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1809.07998">arXiv:1809.07998</a> <span> [<a href="https://arxiv.org/pdf/1809.07998">pdf</a>, <a href="https://arxiv.org/ps/1809.07998">ps</a>, <a href="https://arxiv.org/format/1809.07998">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical System Mapping for Large-Scale Fault-Tolerant Quantum Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Yongsoo Hwang</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+B">Byung-Soo Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1809.07998v1-abstract-short" style="display: inline;"> Considering the large-scale quantum computer, it is important to know how much quantum computational resources is necessary precisely and quickly. Unfortunately the previous methods so far cannot support a large-scale quantum computing practically and therefore the analysis because they usually use a non-structured code. To overcome this problem, we propose a fast mapping by using the hierarchical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.07998v1-abstract-full').style.display = 'inline'; document.getElementById('1809.07998v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1809.07998v1-abstract-full" style="display: none;"> Considering the large-scale quantum computer, it is important to know how much quantum computational resources is necessary precisely and quickly. Unfortunately the previous methods so far cannot support a large-scale quantum computing practically and therefore the analysis because they usually use a non-structured code. To overcome this problem, we propose a fast mapping by using the hierarchical assembly code which is much more compact than the non-structured code. During the mapping process, the necessary modules and their interconnection can be dynamically mapped by using the communication bus at the cost of additional qubits. In our study, the proposed method works very fast such as 1 hour than 1500 days for Shor algorithm to factorize 512-bit integer. Meanwhile, since the hierarchical assembly code has high degree of locality, it has shorter SWAP chains and hence it does not increase the quantum computation time than expected. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.07998v1-abstract-full').style.display = 'none'; document.getElementById('1809.07998v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1807.06233">arXiv:1807.06233</a> <span> [<a href="https://arxiv.org/pdf/1807.06233">pdf</a>, <a href="https://arxiv.org/format/1807.06233">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robust Deep Multi-modal Learning Based on Gated Information Fusion Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jaekyum Kim</a>, <a href="/search/cs?searchtype=author&query=Koh%2C+J">Junho Koh</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yecheol Kim</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+J">Jaehyung Choi</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Youngbae Hwang</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+J+W">Jun Won Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1807.06233v2-abstract-short" style="display: inline;"> The goal of multi-modal learning is to use complimentary information on the relevant task provided by the multiple modalities to achieve reliable and robust performance. Recently, deep learning has led significant improvement in multi-modal learning by allowing for the information fusion in the intermediate feature levels. This paper addresses a problem of designing robust deep multi-modal learnin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.06233v2-abstract-full').style.display = 'inline'; document.getElementById('1807.06233v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1807.06233v2-abstract-full" style="display: none;"> The goal of multi-modal learning is to use complimentary information on the relevant task provided by the multiple modalities to achieve reliable and robust performance. Recently, deep learning has led significant improvement in multi-modal learning by allowing for the information fusion in the intermediate feature levels. This paper addresses a problem of designing robust deep multi-modal learning architecture in the presence of imperfect modalities. We introduce deep fusion architecture for object detection which processes each modality using the separate convolutional neural network (CNN) and constructs the joint feature map by combining the intermediate features from the CNNs. In order to facilitate the robustness to the degraded modalities, we employ the gated information fusion (GIF) network which weights the contribution from each modality according to the input feature maps to be fused. The weights are determined through the convolutional layers followed by a sigmoid function and trained along with the information fusion network in an end-to-end fashion. Our experiments show that the proposed GIF network offers the additional architectural flexibility to achieve robust performance in handling some degraded modalities, and show a significant performance improvement based on Single Shot Detector (SSD) for KITTI dataset using the proposed fusion network and data augmentation schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.06233v2-abstract-full').style.display = 'none'; document.getElementById('1807.06233v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 July, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2018 Asian Conference on Computer Vision (ACCV)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.03009">arXiv:1801.03009</a> <span> [<a href="https://arxiv.org/pdf/1801.03009">pdf</a>, <a href="https://arxiv.org/format/1801.03009">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Analysis, Statistics and Probability">physics.data-an</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.cma.2018.12.022">10.1016/j.cma.2018.12.022 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Development of hp-inverse model by using generalized polynomial chaos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yeo%2C+K">Kyongmin Yeo</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Youngdeok Hwang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiao Liu</a>, <a href="/search/cs?searchtype=author&query=Kalagnanam%2C+J">Jayant Kalagnanam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.03009v2-abstract-short" style="display: inline;"> We present a hp-inverse model to estimate a smooth, non-negative source function from a limited number of observations for a two-dimensional linear source inversion problem. A standard least-square inverse model is formulated by using a set of Gaussian radial basis functions (GRBF) on a rectangular mesh system with a uniform grid space. Here, the choice of the mesh system is modeled as a random va… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.03009v2-abstract-full').style.display = 'inline'; document.getElementById('1801.03009v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.03009v2-abstract-full" style="display: none;"> We present a hp-inverse model to estimate a smooth, non-negative source function from a limited number of observations for a two-dimensional linear source inversion problem. A standard least-square inverse model is formulated by using a set of Gaussian radial basis functions (GRBF) on a rectangular mesh system with a uniform grid space. Here, the choice of the mesh system is modeled as a random variable and the generalized polynomial chaos (gPC) expansion is used to represent the random mesh system. It is shown that the convolution of gPC and GRBF provides hierarchical basis functions for the linear source inverse model with the $hp$-refinement capability. We propose a mixed l_1 and l_2 regularization to exploit the hierarchical nature of the basis functions to find a sparse solution. The $hp$-inverse model has an advantage over the standard least-square inverse model when the number of data is limited. It is shown that the hp-inverse model provides a good estimate of the source function even when the number of unknown parameters ($m$) is much larger the number of data ($n$), e.g., m/n > 40. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.03009v2-abstract-full').style.display = 'none'; document.getElementById('1801.03009v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1712.09721">arXiv:1712.09721</a> <span> [<a href="https://arxiv.org/pdf/1712.09721">pdf</a>, <a href="https://arxiv.org/ps/1712.09721">ps</a>, <a href="https://arxiv.org/format/1712.09721">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Functional Analysis">math.FA</span> </div> </div> <p class="title is-5 mathjax"> Analysis of the Game-Theoretic Modeling of Backscatter Wireless Sensor Networks under Smart Interference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hong%2C+S+G">Seung Gwan Hong</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y+M">Yu Min Hwang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S+Y">Sun Yui Lee</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+Y">Yoan Shin</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J+Y">Jin Young Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1712.09721v1-abstract-short" style="display: inline;"> In this paper, we study an interference avoidance scenario in the presence of a smart interferer which can rapidly observe the transmit power of a backscatter wireless sensor network (WSN) and effectively interrupt backscatter signals. We consider a power control with a sub-channel allocation to avoid interference attacks and a time-switching ratio for backscattering and RF energy harvesting in ba… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.09721v1-abstract-full').style.display = 'inline'; document.getElementById('1712.09721v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1712.09721v1-abstract-full" style="display: none;"> In this paper, we study an interference avoidance scenario in the presence of a smart interferer which can rapidly observe the transmit power of a backscatter wireless sensor network (WSN) and effectively interrupt backscatter signals. We consider a power control with a sub-channel allocation to avoid interference attacks and a time-switching ratio for backscattering and RF energy harvesting in backscatter WSNs. We formulate the problem based on a Stackelberg game theory and compute the optimal transmit power, time-switching ratio, and sub-channel allocation parameter to maximize a utility function against the smart interference. We propose two algorithms for the utility maximization using Lagrangian dual decomposition for the backscatter WSN and the smart interference to prove the existence of the Stackelberg equilibrium. Numerical results show that the proposed algorithms effectively maximize the utility, compared to that of the algorithm based on the Nash game, so as to overcome smart interference in backscatter communications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.09721v1-abstract-full').style.display = 'none'; document.getElementById('1712.09721v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1601.05447">arXiv:1601.05447</a> <span> [<a href="https://arxiv.org/pdf/1601.05447">pdf</a>, <a href="https://arxiv.org/format/1601.05447">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Detecting Temporally Consistent Objects in Videos through Object Class Label Propagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tripathi%2C+S">Subarna Tripathi</a>, <a href="/search/cs?searchtype=author&query=Belongie%2C+S">Serge Belongie</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+Y">Youngbae Hwang</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Truong Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1601.05447v1-abstract-short" style="display: inline;"> Object proposals for detecting moving or static video objects need to address issues such as speed, memory complexity and temporal consistency. We propose an efficient Video Object Proposal (VOP) generation method and show its efficacy in learning a better video object detector. A deep-learning based video object detector learned using the proposed VOP achieves state-of-the-art detection performan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1601.05447v1-abstract-full').style.display = 'inline'; document.getElementById('1601.05447v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1601.05447v1-abstract-full" style="display: none;"> Object proposals for detecting moving or static video objects need to address issues such as speed, memory complexity and temporal consistency. We propose an efficient Video Object Proposal (VOP) generation method and show its efficacy in learning a better video object detector. A deep-learning based video object detector learned using the proposed VOP achieves state-of-the-art detection performance on the Youtube-Objects dataset. We further propose a clustering of VOPs which can efficiently be used for detecting objects in video in a streaming fashion. As opposed to applying per-frame convolutional neural network (CNN) based object detection, our proposed method called Objects in Video Enabler thRough LAbel Propagation (OVERLAP) needs to classify only a small fraction of all candidate proposals in every video frame through streaming clustering of object proposals and class-label propagation. Source code will be made available soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1601.05447v1-abstract-full').style.display = 'none'; document.getElementById('1601.05447v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication in WACV 2016</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hwang%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hwang%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hwang%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository