CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 567 results for author: <span class="mathjax">Kang, J</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Kang%2C+J">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Kang, J"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Kang%2C+J&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Kang, J"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12047">arXiv:2411.12047</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.12047">pdf</a>, <a href="https://arxiv.org/format/2411.12047">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Simultaneous Ground Reaction Force and State Estimation via Constrained Moving Horizon Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiarong Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+X">Xiaobin Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12047v1-abstract-short" style="display: inline;"> Accurate ground reaction force (GRF) estimation can significantly improve the adaptability of legged robots in various real-world applications. For instance, with estimated GRF and contact kinematics, the locomotion control and planning assist the robot in overcoming uncertain terrains. The canonical momentum-based methods, formulated as nonlinear observers, do not fully address the noisy measurem&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12047v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12047v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12047v1-abstract-full" style="display: none;"> Accurate ground reaction force (GRF) estimation can significantly improve the adaptability of legged robots in various real-world applications. For instance, with estimated GRF and contact kinematics, the locomotion control and planning assist the robot in overcoming uncertain terrains. The canonical momentum-based methods, formulated as nonlinear observers, do not fully address the noisy measurements and the dependence between floating base states and the generalized momentum dynamics. In this paper, we present a simultaneous ground reaction force and state estimation framework for legged robots, which systematically addresses the sensor noise and the coupling between states and dynamics. With the floating base orientation estimated separately, a decentralized Moving Horizon Estimation (MHE) method is implemented to fuse the robot dynamics, proprioceptive sensors, exteroceptive sensors, and deterministic contact complementarity constraints in a convex windowed optimization. The proposed method is shown to be capable of providing accurate GRF and state estimation on several legged robots, including the open-source educational planar bipedal robot STRIDE and quadrupedal robot Unitree Go1, with a frequency of 200Hz and a past time window of 0.04s. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12047v1-abstract-full').style.display = 'none'; document.getElementById('2411.12047v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11302">arXiv:2411.11302</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.11302">pdf</a>, <a href="https://arxiv.org/format/2411.11302">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Personalized Brain-Computer Interface Application Based on Endogenous EEG Paradigms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kwak%2C+H">Heon-Gyu Kwak</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+G">Gi-Hwan Shin</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+Y">Yeon-Woo Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+D">Dong-Hoon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Jeon%2C+Y">Yoo-In Jeon</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jun-Su Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seong-Whan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11302v1-abstract-short" style="display: inline;"> In this paper, we propose a conceptual framework for personalized brain-computer interface (BCI) applications, which can offer an enhanced user experience by customizing services to individual preferences and needs, based on endogenous electroencephalography (EEG) paradigms including motor imagery (MI), speech imagery (SI), and visual imagery. The framework includes two essential components: user&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11302v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11302v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11302v1-abstract-full" style="display: none;"> In this paper, we propose a conceptual framework for personalized brain-computer interface (BCI) applications, which can offer an enhanced user experience by customizing services to individual preferences and needs, based on endogenous electroencephalography (EEG) paradigms including motor imagery (MI), speech imagery (SI), and visual imagery. The framework includes two essential components: user identification and intention classification, which enable personalized services by identifying individual users and recognizing their intended actions through EEG signals. We validate the feasibility of our framework using a private EEG dataset collected from eight subjects, employing the ShallowConvNet architecture to decode EEG features. The experimental results demonstrate that user identification achieved an average classification accuracy of 0.995, while intention classification achieved 0.47 accuracy across all paradigms, with MI demonstrating the best performance. These findings indicate that EEG signals can effectively support personalized BCI applications, offering robust identification and reliable intention decoding, especially for MI and SI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11302v1-abstract-full').style.display = 'none'; document.getElementById('2411.11302v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submissoion version for IEEE International BCI Winter Conference 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10212">arXiv:2411.10212</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10212">pdf</a>, <a href="https://arxiv.org/format/2411.10212">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Embedding Byzantine Fault Tolerance into Federated Learning via Virtual Data-Driven Consistency Scoring Plugin </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+Y">Youngjoon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+J">Jinu Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Joonhyuk Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10212v1-abstract-short" style="display: inline;"> Given sufficient data from multiple edge devices, federated learning (FL) enables training a shared model without transmitting private data to a central server. However, FL is generally vulnerable to Byzantine attacks from compromised edge devices, which can significantly degrade the model performance. In this paper, we propose a intuitive plugin that can be integrated into existing FL techniques&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10212v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10212v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10212v1-abstract-full" style="display: none;"> Given sufficient data from multiple edge devices, federated learning (FL) enables training a shared model without transmitting private data to a central server. However, FL is generally vulnerable to Byzantine attacks from compromised edge devices, which can significantly degrade the model performance. In this paper, we propose a intuitive plugin that can be integrated into existing FL techniques to achieve Byzantine-Resilience. Key idea is to generate virtual data samples and evaluate model consistency scores across local updates to effectively filter out compromised edge devices. By utilizing this scoring mechanism before the aggregation phase, the proposed plugin enables existing FL techniques to become robust against Byzantine attacks while maintaining their original benefits. Numerical results on medical image classification task validate that plugging the proposed approach into representative FL algorithms, effectively achieves Byzantine resilience. Furthermore, the proposed plugin maintains the original convergence properties of the base FL algorithms when no Byzantine attacks are present. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10212v1-abstract-full').style.display = 'none'; document.getElementById('2411.10212v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09148">arXiv:2411.09148</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.09148">pdf</a>, <a href="https://arxiv.org/format/2411.09148">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Toward Democratized Generative AI in Next-Generation Mobile Edge Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jiayi He</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaofeng Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yonghui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sikdar%2C+B">Biplab Sikdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09148v1-abstract-short" style="display: inline;"> The rapid development of generative AI technologies, including large language models (LLMs), has brought transformative changes to various fields. However, deploying such advanced models on mobile and edge devices remains challenging due to their high computational, memory, communication, and energy requirements. To address these challenges, we propose a model-centric framework for democratizing g&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09148v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09148v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09148v1-abstract-full" style="display: none;"> The rapid development of generative AI technologies, including large language models (LLMs), has brought transformative changes to various fields. However, deploying such advanced models on mobile and edge devices remains challenging due to their high computational, memory, communication, and energy requirements. To address these challenges, we propose a model-centric framework for democratizing generative AI deployment on mobile and edge networks. First, we comprehensively review key compact model strategies, such as quantization, model pruning, and knowledge distillation, and present key performance metrics to optimize generative AI for mobile deployment. Next, we provide a focused review of mobile and edge networks, emphasizing the specific challenges and requirements of these environments. We further conduct a case study demonstrating the effectiveness of these strategies by deploying LLMs on real mobile edge devices. Experimental results highlight the practicality of democratized LLMs, with significant improvements in generalization accuracy, hallucination rate, accessibility, and resource consumption. Finally, we discuss potential research directions to further advance the deployment of generative AI in resource-constrained environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09148v1-abstract-full').style.display = 'none'; document.getElementById('2411.09148v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08341">arXiv:2411.08341</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.08341">pdf</a>, <a href="https://arxiv.org/format/2411.08341">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generative AI for Data Augmentation in Wireless Networks: Analysis, Applications, and Case Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wen%2C+J">Jinbo Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Sikdar%2C+B">Biplab Sikdar</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+P">Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08341v1-abstract-short" style="display: inline;"> Data augmentation is a powerful technique to mitigate data scarcity. However, owing to fundamental differences in wireless data structures, traditional data augmentation techniques may not be suitable for wireless data. Fortunately, Generative Artificial Intelligence (GenAI) can be an effective alternative to wireless data augmentation due to its excellent data generation capability. This article&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08341v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08341v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08341v1-abstract-full" style="display: none;"> Data augmentation is a powerful technique to mitigate data scarcity. However, owing to fundamental differences in wireless data structures, traditional data augmentation techniques may not be suitable for wireless data. Fortunately, Generative Artificial Intelligence (GenAI) can be an effective alternative to wireless data augmentation due to its excellent data generation capability. This article systemically explores the potential and effectiveness of GenAI-driven data augmentation in wireless networks. We first briefly review data augmentation techniques, discuss their limitations in wireless networks, and introduce generative data augmentation, including reviewing GenAI models and their applications in data augmentation. We then explore the application prospects of GenAI-driven data augmentation in wireless networks from the physical, network, and application layers, which provides a GenAI-driven data augmentation architecture for each application. Subsequently, we propose a general generative diffusion model-based data augmentation framework for Wi-Fi gesture recognition, which uses transformer-based diffusion models to generate high-quality channel state information data. Furthermore, we develop residual neural network models for Wi-Fi gesture recognition to evaluate the role of augmented data and conduct a case study based on a real dataset. Simulation results demonstrate the effectiveness of the proposed framework. Finally, we discuss research directions for generative data augmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08341v1-abstract-full').style.display = 'none'; document.getElementById('2411.08341v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07985">arXiv:2411.07985</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.07985">pdf</a>, <a href="https://arxiv.org/ps/2411.07985">ps</a>, <a href="https://arxiv.org/format/2411.07985">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> </div> </div> <p class="title is-5 mathjax"> Largest component in Boolean sublattices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Galliano%2C+J">Julian Galliano</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+R+J">Ross J. Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07985v1-abstract-short" style="display: inline;"> For a subfamily ${F}\subseteq 2^{[n]}$ of the Boolean lattice, consider the graph $G_{F}$ on ${F}$ based on the pairwise inclusion relations among its members. Given a positive integer $t$, how large can ${F}$ be before $G_{F}$ must contain some component of order greater than $t$? For $t=1$, this question was answered exactly almost a century ago by Sperner: the size of a middle layer of the Bool&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07985v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07985v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07985v1-abstract-full" style="display: none;"> For a subfamily ${F}\subseteq 2^{[n]}$ of the Boolean lattice, consider the graph $G_{F}$ on ${F}$ based on the pairwise inclusion relations among its members. Given a positive integer $t$, how large can ${F}$ be before $G_{F}$ must contain some component of order greater than $t$? For $t=1$, this question was answered exactly almost a century ago by Sperner: the size of a middle layer of the Boolean lattice. For $t=2^n$, this question is trivial. We are interested in what happens between these two extremes. For $t=2^{g}$ with $g=g(n)$ being any integer function that satisfies $g(n)=o(n/\log n)$ as $n\to\infty$, we give an asymptotically sharp answer to the above question: not much larger than the size of a middle layer. This constitutes a nontrivial generalisation of Sperner&#39;s theorem. We do so by a reduction to a Tur谩n-type problem for rainbow cycles in properly edge-coloured graphs. Among other results, we also give a sharp answer to the question, how large can ${F}$ be before $G_{F}$ must be connected? <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07985v1-abstract-full').style.display = 'none'; document.getElementById('2411.07985v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 2 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 05D05; 06A07; 05C35 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07563">arXiv:2411.07563</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.07563">pdf</a>, <a href="https://arxiv.org/format/2411.07563">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Improving Grapheme-to-Phoneme Conversion through In-Context Knowledge Retrieval with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Han%2C+D">Dongrui Han</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+M">Mingyu Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xixin Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xunying Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07563v1-abstract-short" style="display: inline;"> Grapheme-to-phoneme (G2P) conversion is a crucial step in Text-to-Speech (TTS) systems, responsible for mapping grapheme to corresponding phonetic representations. However, it faces ambiguities problems where the same grapheme can represent multiple phonemes depending on contexts, posing a challenge for G2P conversion. Inspired by the remarkable success of Large Language Models (LLMs) in handling&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07563v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07563v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07563v1-abstract-full" style="display: none;"> Grapheme-to-phoneme (G2P) conversion is a crucial step in Text-to-Speech (TTS) systems, responsible for mapping grapheme to corresponding phonetic representations. However, it faces ambiguities problems where the same grapheme can represent multiple phonemes depending on contexts, posing a challenge for G2P conversion. Inspired by the remarkable success of Large Language Models (LLMs) in handling context-aware scenarios, contextual G2P conversion systems with LLMs&#39; in-context knowledge retrieval (ICKR) capabilities are proposed to promote disambiguation capability. The efficacy of incorporating ICKR into G2P conversion systems is demonstrated thoroughly on the Librig2p dataset. In particular, the best contextual G2P conversion system using ICKR outperforms the baseline with weighted average phoneme error rate (PER) reductions of 2.0% absolute (28.9% relative). Using GPT-4 in the ICKR system can increase of 3.5% absolute (3.8% relative) on the Librig2p dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07563v1-abstract-full').style.display = 'none'; document.getElementById('2411.07563v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by ISCSLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04473">arXiv:2411.04473</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.04473">pdf</a>, <a href="https://arxiv.org/format/2411.04473">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ML-Promise: A Multilingual Dataset for Corporate Promise Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Seki%2C+Y">Yohei Seki</a>, <a href="/search/cs?searchtype=author&amp;query=Shu%2C+H">Hakusen Shu</a>, <a href="/search/cs?searchtype=author&amp;query=Lhuissier%2C+A">Ana茂s Lhuissier</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Hanwool Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Juyeon Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Day%2C+M">Min-Yuh Day</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Chung-Chi Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04473v1-abstract-short" style="display: inline;"> Promises made by politicians, corporate leaders, and public figures have a significant impact on public perception, trust, and institutional reputation. However, the complexity and volume of such commitments, coupled with difficulties in verifying their fulfillment, necessitate innovative methods for assessing their credibility. This paper introduces the concept of Promise Verification, a systemat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04473v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04473v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04473v1-abstract-full" style="display: none;"> Promises made by politicians, corporate leaders, and public figures have a significant impact on public perception, trust, and institutional reputation. However, the complexity and volume of such commitments, coupled with difficulties in verifying their fulfillment, necessitate innovative methods for assessing their credibility. This paper introduces the concept of Promise Verification, a systematic approach involving steps such as promise identification, evidence assessment, and the evaluation of timing for verification. We propose the first multilingual dataset, ML-Promise, which includes English, French, Chinese, Japanese, and Korean, aimed at facilitating in-depth verification of promises, particularly in the context of Environmental, Social, and Governance (ESG) reports. Given the growing emphasis on corporate environmental contributions, this dataset addresses the challenge of evaluating corporate promises, especially in light of practices like greenwashing. Our findings also explore textual and image-based baselines, with promising results from retrieval-augmented generation (RAG) approaches. This work aims to foster further discourse on the accountability of public commitments across multiple languages and domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04473v1-abstract-full').style.display = 'none'; document.getElementById('2411.04473v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04139">arXiv:2411.04139</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.04139">pdf</a>, <a href="https://arxiv.org/format/2411.04139">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Diffusion-based Auction Mechanism for Efficient Resource Management in 6G-enabled Vehicular Metaverses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Tong%2C+Y">Yongju Tong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+Y">Yue Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Junlong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+M">Minrui Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+R">Runrong Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+S">Shiwen Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04139v1-abstract-short" style="display: inline;"> The rise of 6G-enable Vehicular Metaverses is transforming the automotive industry by integrating immersive, real-time vehicular services through ultra-low latency and high bandwidth connectivity. In 6G-enable Vehicular Metaverses, vehicles are represented by Vehicle Twins (VTs), which serve as digital replicas of physical vehicles to support real-time vehicular applications such as large Artifici&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04139v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04139v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04139v1-abstract-full" style="display: none;"> The rise of 6G-enable Vehicular Metaverses is transforming the automotive industry by integrating immersive, real-time vehicular services through ultra-low latency and high bandwidth connectivity. In 6G-enable Vehicular Metaverses, vehicles are represented by Vehicle Twins (VTs), which serve as digital replicas of physical vehicles to support real-time vehicular applications such as large Artificial Intelligence (AI) model-based Augmented Reality (AR) navigation, called VT tasks. VT tasks are resource-intensive and need to be offloaded to ground Base Stations (BSs) for fast processing. However, high demand for VT tasks and limited resources of ground BSs, pose significant resource allocation challenges, particularly in densely populated urban areas like intersections. As a promising solution, Unmanned Aerial Vehicles (UAVs) act as aerial edge servers to dynamically assist ground BSs in handling VT tasks, relieving resource pressure on ground BSs. However, due to high mobility of UAVs, there exists information asymmetry regarding VT task demands between UAVs and ground BSs, resulting in inefficient resource allocation of UAVs. To address these challenges, we propose a learning-based Modified Second-Bid (MSB) auction mechanism to optimize resource allocation between ground BSs and UAVs by accounting for VT task latency and accuracy. Moreover, we design a diffusion-based reinforcement learning algorithm to optimize the price scaling factor, maximizing the total surplus of resource providers and minimizing VT task latency. Finally, simulation results demonstrate that the proposed diffusion-based MSB auction outperforms traditional baselines, providing better resource distribution and enhanced service quality for vehicular users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04139v1-abstract-full').style.display = 'none'; document.getElementById('2411.04139v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01996">arXiv:2411.01996</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01996">pdf</a>, <a href="https://arxiv.org/format/2411.01996">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Culinary Class Wars: Evaluating LLMs using ASH in Cuisine Transfer Task </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Hoonick Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Gim%2C+M">Mogan Gim</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+D">Donghyeon Park</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+D">Donghee Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jaewoo Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01996v1-abstract-short" style="display: inline;"> The advent of Large Language Models (LLMs) have shown promise in various creative domains, including culinary arts. However, many LLMs still struggle to deliver the desired level of culinary creativity, especially when tasked with adapting recipes to meet specific cultural requirements. This study focuses on cuisine transfer-applying elements of one cuisine to another-to assess LLMs&#39; culinary crea&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01996v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01996v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01996v1-abstract-full" style="display: none;"> The advent of Large Language Models (LLMs) have shown promise in various creative domains, including culinary arts. However, many LLMs still struggle to deliver the desired level of culinary creativity, especially when tasked with adapting recipes to meet specific cultural requirements. This study focuses on cuisine transfer-applying elements of one cuisine to another-to assess LLMs&#39; culinary creativity. We employ a diverse set of LLMs to generate and evaluate culturally adapted recipes, comparing their evaluations against LLM and human judgments. We introduce the ASH (authenticity, sensitivity, harmony) benchmark to evaluate LLMs&#39; recipe generation abilities in the cuisine transfer task, assessing their cultural accuracy and creativity in the culinary domain. Our findings reveal crucial insights into both generative and evaluative capabilities of LLMs in the culinary domain, highlighting strengths and limitations in understanding and applying cultural nuances in recipe creation. The code and dataset used in this project will be openly available in \url{http://github.com/dmis-lab/CulinaryASH}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01996v1-abstract-full').style.display = 'none'; document.getElementById('2411.01996v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01410">arXiv:2411.01410</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01410">pdf</a>, <a href="https://arxiv.org/format/2411.01410">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> PageRank Bandits for Link Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ban%2C+Y">Yikun Ban</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+J">Jiaru Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zihao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+Y">Yunzhe Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+D">Dongqi Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jian Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Tong%2C+H">Hanghang Tong</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jingrui He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01410v1-abstract-short" style="display: inline;"> Link prediction is a critical problem in graph learning with broad applications such as recommender systems and knowledge graph completion. Numerous research efforts have been directed at solving this problem, including approaches based on similarity metrics and Graph Neural Networks (GNN). However, most existing solutions are still rooted in conventional supervised learning, which makes it challe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01410v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01410v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01410v1-abstract-full" style="display: none;"> Link prediction is a critical problem in graph learning with broad applications such as recommender systems and knowledge graph completion. Numerous research efforts have been directed at solving this problem, including approaches based on similarity metrics and Graph Neural Networks (GNN). However, most existing solutions are still rooted in conventional supervised learning, which makes it challenging to adapt over time to changing customer interests and to address the inherent dilemma of exploitation versus exploration in link prediction. To tackle these challenges, this paper reformulates link prediction as a sequential decision-making process, where each link prediction interaction occurs sequentially. We propose a novel fusion algorithm, PRB (PageRank Bandits), which is the first to combine contextual bandits with PageRank for collaborative exploitation and exploration. We also introduce a new reward formulation and provide a theoretical performance guarantee for PRB. Finally, we extensively evaluate PRB in both online and offline settings, comparing it with bandit-based and graph-based methods. The empirical success of PRB demonstrates the value of the proposed fusion approach. Our code is released at https://github.com/jiaruzouu/PRB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01410v1-abstract-full').style.display = 'none'; document.getElementById('2411.01410v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00300">arXiv:2411.00300</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.00300">pdf</a>, <a href="https://arxiv.org/format/2411.00300">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Rationale-Guided Retrieval Augmented Generation for Medical Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sohn%2C+J">Jiwoong Sohn</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+Y">Yein Park</a>, <a href="/search/cs?searchtype=author&amp;query=Yoon%2C+C">Chanwoong Yoon</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+S">Sihyeon Park</a>, <a href="/search/cs?searchtype=author&amp;query=Hwang%2C+H">Hyeon Hwang</a>, <a href="/search/cs?searchtype=author&amp;query=Sung%2C+M">Mujeen Sung</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Hyunjae Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jaewoo Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00300v1-abstract-short" style="display: inline;"> Large language models (LLM) hold significant potential for applications in biomedicine, but they struggle with hallucinations and outdated knowledge. While retrieval-augmented generation (RAG) is generally employed to address these issues, it also has its own set of challenges: (1) LLMs are vulnerable to irrelevant or incorrect context, (2) medical queries are often not well-targeted for helpful i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00300v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00300v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00300v1-abstract-full" style="display: none;"> Large language models (LLM) hold significant potential for applications in biomedicine, but they struggle with hallucinations and outdated knowledge. While retrieval-augmented generation (RAG) is generally employed to address these issues, it also has its own set of challenges: (1) LLMs are vulnerable to irrelevant or incorrect context, (2) medical queries are often not well-targeted for helpful information, and (3) retrievers are prone to bias toward the specific source corpus they were trained on. In this study, we present RAG$^2$ (RAtionale-Guided RAG), a new framework for enhancing the reliability of RAG in biomedical contexts. RAG$^2$ incorporates three key innovations: a small filtering model trained on perplexity-based labels of rationales, which selectively augments informative snippets of documents while filtering out distractors; LLM-generated rationales as queries to improve the utility of retrieved snippets; a structure designed to retrieve snippets evenly from a comprehensive set of four biomedical corpora, effectively mitigating retriever bias. Our experiments demonstrate that RAG$^2$ improves the state-of-the-art LLMs of varying sizes, with improvements of up to 6.1\%, and it outperforms the previous best medical RAG model by up to 5.6\% across three medical question-answering benchmarks. Our code is available at https://github.com/dmis-lab/RAG2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00300v1-abstract-full').style.display = 'none'; document.getElementById('2411.00300v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00257">arXiv:2411.00257</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.00257">pdf</a>, <a href="https://arxiv.org/format/2411.00257">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Understanding Graphical Perception in Data Visualization through Zero-shot Prompting of Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+G">Grace Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J+J">Jenna Jiayi Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Shah%2C+R+S">Raj Sanjay Shah</a>, <a href="/search/cs?searchtype=author&amp;query=Pfister%2C+H">Hanspeter Pfister</a>, <a href="/search/cs?searchtype=author&amp;query=Varma%2C+S">Sashank Varma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00257v1-abstract-short" style="display: inline;"> Vision Language Models (VLMs) have been successful at many chart comprehension tasks that require attending to both the images of charts and their accompanying textual descriptions. However, it is not well established how VLM performance profiles map to human-like behaviors. If VLMs can be shown to have human-like chart comprehension abilities, they can then be applied to a broader range of tasks,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00257v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00257v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00257v1-abstract-full" style="display: none;"> Vision Language Models (VLMs) have been successful at many chart comprehension tasks that require attending to both the images of charts and their accompanying textual descriptions. However, it is not well established how VLM performance profiles map to human-like behaviors. If VLMs can be shown to have human-like chart comprehension abilities, they can then be applied to a broader range of tasks, such as designing and evaluating visualizations for human readers. This paper lays the foundations for such applications by evaluating the accuracy of zero-shot prompting of VLMs on graphical perception tasks with established human performance profiles. Our findings reveal that VLMs perform similarly to humans under specific task and style combinations, suggesting that they have the potential to be used for modeling human performance. Additionally, variations to the input stimuli show that VLM accuracy is sensitive to stylistic changes such as fill color and chart contiguity, even when the underlying data and data mappings are the same. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00257v1-abstract-full').style.display = 'none'; document.getElementById('2411.00257v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23824">arXiv:2410.23824</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.23824">pdf</a>, <a href="https://arxiv.org/format/2410.23824">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Generative AI-Powered Plugin for Robust Federated Learning in Heterogeneous IoT Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+Y">Youngjoon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+J">Jinu Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Joonhyuk Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23824v1-abstract-short" style="display: inline;"> Federated learning enables edge devices to collaboratively train a global model while maintaining data privacy by keeping data localized. However, the Non-IID nature of data distribution across devices often hinders model convergence and reduces performance. In this paper, we propose a novel plugin for federated optimization techniques that approximates Non-IID data distributions to IID through ge&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23824v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23824v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23824v1-abstract-full" style="display: none;"> Federated learning enables edge devices to collaboratively train a global model while maintaining data privacy by keeping data localized. However, the Non-IID nature of data distribution across devices often hinders model convergence and reduces performance. In this paper, we propose a novel plugin for federated optimization techniques that approximates Non-IID data distributions to IID through generative AI-enhanced data augmentation and balanced sampling strategy. Key idea is to synthesize additional data for underrepresented classes on each edge device, leveraging generative AI to create a more balanced dataset across the FL network. Additionally, a balanced sampling approach at the central server selectively includes only the most IID-like devices, accelerating convergence while maximizing the global model&#39;s performance. Experimental results validate that our approach significantly improves convergence speed and robustness against data imbalance, establishing a flexible, privacy-preserving FL plugin that is applicable even in data-scarce environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23824v1-abstract-full').style.display = 'none'; document.getElementById('2410.23824v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20660">arXiv:2410.20660</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.20660">pdf</a>, <a href="https://arxiv.org/format/2410.20660">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> TurboHopp: Accelerated Molecule Scaffold Hopping with Consistency Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yoo%2C+K">Kiwoong Yoo</a>, <a href="/search/cs?searchtype=author&amp;query=Oertell%2C+O">Owen Oertell</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+J">Junhyun Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sanghoon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jaewoo Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20660v1-abstract-short" style="display: inline;"> Navigating the vast chemical space of druggable compounds is a formidable challenge in drug discovery, where generative models are increasingly employed to identify viable candidates. Conditional 3D structure-based drug design (3D-SBDD) models, which take into account complex three-dimensional interactions and molecular geometries, are particularly promising. Scaffold hopping is an efficient strat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20660v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20660v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20660v1-abstract-full" style="display: none;"> Navigating the vast chemical space of druggable compounds is a formidable challenge in drug discovery, where generative models are increasingly employed to identify viable candidates. Conditional 3D structure-based drug design (3D-SBDD) models, which take into account complex three-dimensional interactions and molecular geometries, are particularly promising. Scaffold hopping is an efficient strategy that facilitates the identification of similar active compounds by strategically modifying the core structure of molecules, effectively narrowing the wide chemical space and enhancing the discovery of drug-like products. However, the practical application of 3D-SBDD generative models is hampered by their slow processing speeds. To address this bottleneck, we introduce TurboHopp, an accelerated pocket-conditioned 3D scaffold hopping model that merges the strategic effectiveness of traditional scaffold hopping with rapid generation capabilities of consistency models. This synergy not only enhances efficiency but also significantly boosts generation speeds, achieving up to 30 times faster inference speed as well as superior generation quality compared to existing diffusion-based models, establishing TurboHopp as a powerful tool in drug discovery. Supported by faster inference speed, we further optimize our model, using Reinforcement Learning for Consistency Models (RLCM), to output desirable molecules. We demonstrate the broad applicability of TurboHopp across multiple drug discovery scenarios, underscoring its potential in diverse molecular settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20660v1-abstract-full').style.display = 'none'; document.getElementById('2410.20660v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 11 figures, 8 tables. Presented at NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19665">arXiv:2410.19665</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.19665">pdf</a>, <a href="https://arxiv.org/format/2410.19665">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> MetaTrading: An Immersion-Aware Model Trading Framework for Vehicular Metaverse Services </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+H">Hongjia Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+H">Hui Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+Z">Zhiping Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Chan%2C+T">Tse-Tin Chan</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+Z">Zhu Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19665v1-abstract-short" style="display: inline;"> Updates of extensive Internet of Things (IoT) data are critical to the immersion of vehicular metaverse services. However, providing high-quality and sustainable data in unstable and resource-constrained vehicular networks remains a significant challenge. To address this problem, we put forth a novel immersion-aware model trading framework that incentivizes metaverse users (MUs) to contribute lear&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19665v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19665v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19665v1-abstract-full" style="display: none;"> Updates of extensive Internet of Things (IoT) data are critical to the immersion of vehicular metaverse services. However, providing high-quality and sustainable data in unstable and resource-constrained vehicular networks remains a significant challenge. To address this problem, we put forth a novel immersion-aware model trading framework that incentivizes metaverse users (MUs) to contribute learning models trained by their latest local data for augmented reality (AR) services in the vehicular metaverse, while preserving their privacy through federated learning. To comprehensively evaluate the contribution of locally trained learning models provided by MUs to AR services, we design a new immersion metric that captures service immersion by considering the freshness and accuracy of learning models, as well as the amount and potential value of raw data used for training. We model the trading interactions between metaverse service providers (MSPs) and MUs as an equilibrium problem with equilibrium constraints (EPEC) to analyze and balance their costs and gains. Moreover, considering dynamic network conditions and privacy concerns, we formulate the reward decisions of MSPs as a multi-agent Markov decision process. Then, a fully distributed dynamic reward method based on deep reinforcement learning is presented, which operates without any private information about MUs and other MSPs. Experimental results demonstrate that the proposed framework can effectively provide higher-value models for object detection and classification in AR services on real AR-related vehicle datasets compared to benchmark schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19665v1-abstract-full').style.display = 'none'; document.getElementById('2410.19665v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18652">arXiv:2410.18652</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.18652">pdf</a>, <a href="https://arxiv.org/format/2410.18652">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> $C^2$: Scalable Auto-Feedback for LLM-based Chart Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Koh%2C+W">Woosung Koh</a>, <a href="/search/cs?searchtype=author&amp;query=Yoon%2C+J+H">Jang Han Yoon</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+M">MinHyung Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Youngjin Song</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+J">Jaegwan Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jaehyun Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+T">Taehyeon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Yun%2C+S">Se-young Yun</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+Y">Youngjae Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+B">Bongshin Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18652v2-abstract-short" style="display: inline;"> Generating high-quality charts with Large Language Models presents significant challenges due to limited data and the high cost of scaling through human curation. Instruction, data, and code triplets are scarce and expensive to manually curate as their creation demands technical expertise. To address this scalability issue, we introduce a reference-free automatic feedback generator, which eliminat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18652v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18652v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18652v2-abstract-full" style="display: none;"> Generating high-quality charts with Large Language Models presents significant challenges due to limited data and the high cost of scaling through human curation. Instruction, data, and code triplets are scarce and expensive to manually curate as their creation demands technical expertise. To address this scalability issue, we introduce a reference-free automatic feedback generator, which eliminates the need for costly human intervention. Our novel framework, $C^2$, consists of (1) an automatic feedback provider (ChartAF) and (2) a diverse, reference-free dataset (ChartUIE-8K). Quantitative results are compelling: in our first experiment, 74% of respondents strongly preferred, and 10% preferred, the results after feedback. The second post-feedback experiment demonstrates that ChartAF outperforms nine baselines. Moreover, ChartUIE-8K significantly improves data diversity by increasing queries, datasets, and chart types by 5982%, 1936%, and 91%, respectively, over benchmarks. Finally, an LLM user study revealed that 94% of participants preferred ChartUIE-8K&#39;s queries, with 93% deeming them aligned with real-world use cases. Core contributions are available as open-source at an anonymized project site, with ample qualitative examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18652v2-abstract-full').style.display = 'none'; document.getElementById('2410.18652v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16848">arXiv:2410.16848</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16848">pdf</a>, <a href="https://arxiv.org/format/2410.16848">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ETHIC: Evaluating Large Language Models on Long-Context Tasks with High Information Coverage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+T">Taewhoo Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Yoon%2C+C">Chanwoong Yoon</a>, <a href="/search/cs?searchtype=author&amp;query=Jang%2C+K">Kyochul Jang</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+D">Donghyeon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+M">Minju Song</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Hyunjae Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jaewoo Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16848v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLM) capable of processing extremely long texts highlight the need for a dedicated evaluation benchmark to assess their long-context capabilities. However, existing methods, like the needle-in-a-haystack test, do not effectively assess whether these models fully utilize contextual information, raising concerns about the reliability of current evaluatio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16848v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16848v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16848v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLM) capable of processing extremely long texts highlight the need for a dedicated evaluation benchmark to assess their long-context capabilities. However, existing methods, like the needle-in-a-haystack test, do not effectively assess whether these models fully utilize contextual information, raising concerns about the reliability of current evaluation techniques. To thoroughly examine the effectiveness of existing benchmarks, we introduce a new metric called information coverage (IC), which quantifies the proportion of the input context necessary for answering queries. Our findings indicate that current benchmarks exhibit low IC; although the input context may be extensive, the actual usable context is often limited. To address this, we present ETHIC, a novel benchmark designed to assess LLMs&#39; ability to leverage the entire context. Our benchmark comprises 2,648 test instances spanning four long-context tasks with high IC scores in the domains of books, debates, medicine, and law. Our evaluations reveal significant performance drops in contemporary LLMs, highlighting a critical challenge in managing long contexts. Our benchmark is available at https://github.com/dmis-lab/ETHIC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16848v1-abstract-full').style.display = 'none'; document.getElementById('2410.16848v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14577">arXiv:2410.14577</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.14577">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Reimagining partial thickness keratoplasty: An eye mountable robot for autonomous big bubble needle insertion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Y. Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Opfermann%2C+J+D">J. D. Opfermann</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">J. Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Yi%2C+H">H. Yi</a>, <a href="/search/cs?searchtype=author&amp;query=Kaluna%2C+J">J. Kaluna</a>, <a href="/search/cs?searchtype=author&amp;query=Biswas%2C+R">R. Biswas</a>, <a href="/search/cs?searchtype=author&amp;query=Zuo%2C+R">R. Zuo</a>, <a href="/search/cs?searchtype=author&amp;query=Gensheimer%2C+W">W. Gensheimer</a>, <a href="/search/cs?searchtype=author&amp;query=Krieger%2C+A">A. Krieger</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J+U">J. U. Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14577v1-abstract-short" style="display: inline;"> Autonomous surgical robots have demonstrated significant potential to standardize surgical outcomes, driving innovations that enhance safety and consistency regardless of individual surgeon experience. Deep anterior lamellar keratoplasty (DALK), a partial thickness corneal transplant surgery aimed at replacing the anterior part of cornea above Descemet membrane (DM), would greatly benefit from an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14577v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14577v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14577v1-abstract-full" style="display: none;"> Autonomous surgical robots have demonstrated significant potential to standardize surgical outcomes, driving innovations that enhance safety and consistency regardless of individual surgeon experience. Deep anterior lamellar keratoplasty (DALK), a partial thickness corneal transplant surgery aimed at replacing the anterior part of cornea above Descemet membrane (DM), would greatly benefit from an autonomous surgical approach as it highly relies on surgeon skill with high perforation rates. In this study, we proposed a novel autonomous surgical robotic system (AUTO-DALK) based on a customized neural network capable of precise needle control and consistent big bubble demarcation on cadaver and live rabbit models. We demonstrate the feasibility of an AI-based image-guided vertical drilling approach for big bubble generation, in contrast to the conventional horizontal needle approach. Our system integrates an optical coherence tomography (OCT) fiber optic distal sensor into the eye-mountable micro robotic system, which automatically segments OCT M-mode depth signals to identify corneal layers using a custom deep learning algorithm. It enables the robot to autonomously guide the needle to targeted tissue layers via a depth-controlled feedback loop. We compared autonomous needle insertion performance and resulting pneumo-dissection using AUTO-DALK against 1) freehand insertion, 2) OCT sensor guided manual insertion, and 3) teleoperated robotic insertion, reporting significant improvements in insertion depth, pneumo-dissection depth, task completion time, and big bubble formation. Ex vivo and in vivo results indicate that the AI-driven, AUTO-DALK system, is a promising solution to standardize pneumo-dissection outcomes for partial thickness keratoplasty. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14577v1-abstract-full').style.display = 'none'; document.getElementById('2410.14577v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09870">arXiv:2410.09870</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.09870">pdf</a>, <a href="https://arxiv.org/format/2410.09870">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ChroKnowledge: Unveiling Chronological Knowledge of Language Models in Multiple Domains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Park%2C+Y">Yein Park</a>, <a href="/search/cs?searchtype=author&amp;query=Yoon%2C+C">Chanwoong Yoon</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Jungwoo Park</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+D">Donghyeon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Jeong%2C+M">Minbyul Jeong</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jaewoo Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09870v1-abstract-short" style="display: inline;"> Large language models (LLMs) have significantly impacted many aspects of our lives. However, assessing and ensuring their chronological knowledge remains challenging. Existing approaches fall short in addressing the accumulative nature of knowledge, often relying on a single time stamp. To overcome this, we introduce ChroKnowBench, a benchmark dataset designed to evaluate chronologically accumulat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09870v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09870v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09870v1-abstract-full" style="display: none;"> Large language models (LLMs) have significantly impacted many aspects of our lives. However, assessing and ensuring their chronological knowledge remains challenging. Existing approaches fall short in addressing the accumulative nature of knowledge, often relying on a single time stamp. To overcome this, we introduce ChroKnowBench, a benchmark dataset designed to evaluate chronologically accumulated knowledge across three key aspects: multiple domains, time dependency, temporal state. Our benchmark distinguishes between knowledge that evolves (e.g., scientific discoveries, amended laws) and knowledge that remain constant (e.g., mathematical truths, commonsense facts). Building on this benchmark, we present ChroKnowledge (Chronological Categorization of Knowledge), a novel sampling-based framework for evaluating and updating LLMs&#39; non-parametric chronological knowledge. Our evaluation shows: (1) The ability of eliciting temporal knowledge varies depending on the data format that model was trained on. (2) LLMs partially recall knowledge or show a cut-off at temporal boundaries rather than recalling all aspects of knowledge correctly. Thus, we apply our ChroKnowPrompt, an in-depth prompting to elicit chronological knowledge by traversing step-by-step through the surrounding time spans. We observe that our framework successfully updates the overall knowledge across the entire timeline in both the biomedical domain (+11.9%) and the general domain (+2.8%), demonstrating its effectiveness in refining temporal knowledge. This non-parametric approach also enables knowledge updates not only in open-source models but also in proprietary LLMs, ensuring comprehensive applicability across model types. We perform a comprehensive analysis based on temporal characteristics of ChroKnowPrompt and validate the potential of various models to elicit intrinsic temporal knowledge through our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09870v1-abstract-full').style.display = 'none'; document.getElementById('2410.09870v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07969">arXiv:2410.07969</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.07969">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> </div> </div> <p class="title is-5 mathjax"> PubMed knowledge graph 2.0: Connecting papers, patents, and clinical trials in biomedical science </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jian Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+C">Chao Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Ying Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Torvik%2C+V+I">Vetle I. Torvik</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jaewoo Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Sung%2C+M">Mujeen Sung</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+M">Min Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07969v1-abstract-short" style="display: inline;"> Papers, patents, and clinical trials are indispensable types of scientific literature in biomedicine, crucial for knowledge sharing and dissemination. However, these documents are often stored in disparate databases with varying management standards and data formats, making it challenging to form systematic, fine-grained connections among them. To address this issue, we introduce PKG2.0, a compreh&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07969v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07969v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07969v1-abstract-full" style="display: none;"> Papers, patents, and clinical trials are indispensable types of scientific literature in biomedicine, crucial for knowledge sharing and dissemination. However, these documents are often stored in disparate databases with varying management standards and data formats, making it challenging to form systematic, fine-grained connections among them. To address this issue, we introduce PKG2.0, a comprehensive knowledge graph dataset encompassing over 36 million papers, 1.3 million patents, and 0.48 million clinical trials in the biomedical field. PKG2.0 integrates these previously dispersed resources through various links, including biomedical entities, author networks, citation relationships, and research projects. Fine-grained biomedical entity extraction, high-performance author name disambiguation, and multi-source citation integration have played a crucial role in the construction of the PKG dataset. Additionally, project data from the NIH Exporter enriches the dataset with metadata of NIH-funded projects and their scholarly outputs. Data validation demonstrates that PKG2.0 excels in key tasks such as author disambiguation and biomedical entity recognition. This dataset provides valuable resources for biomedical researchers, bibliometric scholars, and those engaged in literature mining. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07969v1-abstract-full').style.display = 'none'; document.getElementById('2410.07969v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 6 figures, 22 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07493">arXiv:2410.07493</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.07493">pdf</a>, <a href="https://arxiv.org/format/2410.07493">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Autonomous Robotic System with Optical Coherence Tomography Guidance for Vascular Anastomosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Haworth%2C+J">Jesse Haworth</a>, <a href="/search/cs?searchtype=author&amp;query=Biswas%2C+R">Rishi Biswas</a>, <a href="/search/cs?searchtype=author&amp;query=Opfermann%2C+J">Justin Opfermann</a>, <a href="/search/cs?searchtype=author&amp;query=Kam%2C+M">Michael Kam</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yaning Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Pantalone%2C+D">Desire Pantalone</a>, <a href="/search/cs?searchtype=author&amp;query=Creighton%2C+F+X">Francis X. Creighton</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+R">Robin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J+U">Jin U. Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Krieger%2C+A">Axel Krieger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07493v1-abstract-short" style="display: inline;"> Vascular anastomosis, the surgical connection of blood vessels, is essential in procedures such as organ transplants and reconstructive surgeries. The precision required limits accessibility due to the extensive training needed, with manual suturing leading to variable outcomes and revision rates up to 7.9%. Existing robotic systems, while promising, are either fully teleoperated or lack the capab&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07493v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07493v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07493v1-abstract-full" style="display: none;"> Vascular anastomosis, the surgical connection of blood vessels, is essential in procedures such as organ transplants and reconstructive surgeries. The precision required limits accessibility due to the extensive training needed, with manual suturing leading to variable outcomes and revision rates up to 7.9%. Existing robotic systems, while promising, are either fully teleoperated or lack the capabilities necessary for autonomous vascular anastomosis. We present the Micro Smart Tissue Autonomous Robot (micro-STAR), an autonomous robotic system designed to perform vascular anastomosis on small-diameter vessels. The micro-STAR system integrates a novel suturing tool equipped with Optical Coherence Tomography (OCT) fiber-optic sensor and a microcamera, enabling real-time tissue detection and classification. Our system autonomously places sutures and manipulates tissue with minimal human intervention. In an ex vivo study, micro-STAR achieved outcomes competitive with experienced surgeons in terms of leak pressure, lumen reduction, and suture placement variation, completing 90% of sutures without human intervention. This represents the first instance of a robotic system autonomously performing vascular anastomosis on real tissue, offering significant potential for improving surgical precision and expanding access to high-quality care. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07493v1-abstract-full').style.display = 'none'; document.getElementById('2410.07493v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper was submitted to IEEE TMRB and is currently under review. There are 9 pages, 9 figures, and 2 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T40: Robotics </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03357">arXiv:2410.03357</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.03357">pdf</a>, <a href="https://arxiv.org/format/2410.03357">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Should Cross-Lingual AMR Parsing go Meta? An Empirical Assessment of Meta-Learning and Joint Learning AMR Parsing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jeongwoo Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Coavoux%2C+M">Maximin Coavoux</a>, <a href="/search/cs?searchtype=author&amp;query=Lopez%2C+C">C茅dric Lopez</a>, <a href="/search/cs?searchtype=author&amp;query=Schwab%2C+D">Didier Schwab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03357v1-abstract-short" style="display: inline;"> Cross-lingual AMR parsing is the task of predicting AMR graphs in a target language when training data is available only in a source language. Due to the small size of AMR training data and evaluation data, cross-lingual AMR parsing has only been explored in a small set of languages such as English, Spanish, German, Chinese, and Italian. Taking inspiration from Langedijk et al. (2022), who apply m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03357v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03357v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03357v1-abstract-full" style="display: none;"> Cross-lingual AMR parsing is the task of predicting AMR graphs in a target language when training data is available only in a source language. Due to the small size of AMR training data and evaluation data, cross-lingual AMR parsing has only been explored in a small set of languages such as English, Spanish, German, Chinese, and Italian. Taking inspiration from Langedijk et al. (2022), who apply meta-learning to tackle cross-lingual syntactic parsing, we investigate the use of meta-learning for cross-lingual AMR parsing. We evaluate our models in $k$-shot scenarios (including 0-shot) and assess their effectiveness in Croatian, Farsi, Korean, Chinese, and French. Notably, Korean and Croatian test sets are developed as part of our work, based on the existing The Little Prince English AMR corpus, and made publicly available. We empirically study our method by comparing it to classical joint learning. Our findings suggest that while the meta-learning model performs slightly better in 0-shot evaluation for certain languages, the performance gain is minimal or absent when $k$ is higher than 0. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03357v1-abstract-full').style.display = 'none'; document.getElementById('2410.03357v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">to appear in Findings of EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02110">arXiv:2410.02110</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.02110">pdf</a>, <a href="https://arxiv.org/format/2410.02110">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Can LLMs Reliably Simulate Human Learner Actions? A Simulation Authoring Framework for Open-Ended Learning Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Mannekote%2C+A">Amogh Mannekote</a>, <a href="/search/cs?searchtype=author&amp;query=Davies%2C+A">Adam Davies</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jina Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Boyer%2C+K+E">Kristy Elizabeth Boyer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02110v2-abstract-short" style="display: inline;"> Simulating learner actions helps stress-test open-ended interactive learning environments and prototype new adaptations before deployment. While recent studies show the promise of using large language models (LLMs) for simulating human behavior, such approaches have not gone beyond rudimentary proof-of-concept stages due to key limitations. First, LLMs are highly sensitive to minor prompt variatio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02110v2-abstract-full').style.display = 'inline'; document.getElementById('2410.02110v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02110v2-abstract-full" style="display: none;"> Simulating learner actions helps stress-test open-ended interactive learning environments and prototype new adaptations before deployment. While recent studies show the promise of using large language models (LLMs) for simulating human behavior, such approaches have not gone beyond rudimentary proof-of-concept stages due to key limitations. First, LLMs are highly sensitive to minor prompt variations, raising doubts about their ability to generalize to new scenarios without extensive prompt engineering. Moreover, apparently successful outcomes can often be unreliable, either because domain experts unintentionally guide LLMs to produce expected results, leading to self-fulfilling prophecies; or because the LLM has encountered highly similar scenarios in its training data, meaning that models may not be simulating behavior so much as regurgitating memorized content. To address these challenges, we propose Hyp-Mix, a simulation authoring framework that allows experts to develop and evaluate simulations by combining testable hypotheses about learner behavior. Testing this framework in a physics learning environment, we found that GPT-4 Turbo maintains calibrated behavior even as the underlying learner model changes, providing the first evidence that LLMs can be used to simulate realistic behaviors in open-ended interactive learning environments, a necessary prerequisite for useful LLM behavioral simulation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02110v2-abstract-full').style.display = 'none'; document.getElementById('2410.02110v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01396">arXiv:2410.01396</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.01396">pdf</a>, <a href="https://arxiv.org/format/2410.01396">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Can We Delegate Learning to Automation?: A Comparative Study of LLM Chatbots, Search Engines, and Books </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yeonsun Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+A">Ahyeon Shin</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+M">Mincheol Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiheon Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+J+Y">Jean Young Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01396v1-abstract-short" style="display: inline;"> Learning is a key motivator behind information search behavior. With the emergence of LLM-based chatbots, students are increasingly turning to these tools as their primary resource for acquiring knowledge. However, the transition from traditional resources like textbooks and web searches raises concerns among educators. They worry that these fully-automated LLMs might lead students to delegate cri&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01396v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01396v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01396v1-abstract-full" style="display: none;"> Learning is a key motivator behind information search behavior. With the emergence of LLM-based chatbots, students are increasingly turning to these tools as their primary resource for acquiring knowledge. However, the transition from traditional resources like textbooks and web searches raises concerns among educators. They worry that these fully-automated LLMs might lead students to delegate critical steps of search as learning. In this paper, we systematically uncover three main concerns from educators&#39; perspectives. In response to these concerns, we conducted a mixed-methods study with 92 university students to compare three learning sources with different automation levels. Our results show that LLMs support comprehensive understanding of key concepts without promoting passive learning, though their effectiveness in knowledge retention was limited. Additionally, we found that academic performance impacted both learning outcomes and search patterns. Notably, higher-competence learners engaged more deeply with content through reading-intensive behaviors rather than relying on search activities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01396v1-abstract-full').style.display = 'none'; document.getElementById('2410.01396v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 14 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> K.3.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01176">arXiv:2410.01176</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.01176">pdf</a>, <a href="https://arxiv.org/format/2410.01176">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generative Diffusion-based Contract Design for Efficient AI Twins Migration in Vehicular Embodied AI Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+Y">Yue Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+J">Jinbo Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+D">Dongdong Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Nie%2C+J">Jiangtian Nie</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+X">Xiaozheng Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+S">Shengli Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01176v1-abstract-short" style="display: inline;"> Embodied AI is a rapidly advancing field that bridges the gap between cyberspace and physical space, enabling a wide range of applications. This evolution has led to the development of the Vehicular Embodied AI NETwork (VEANET), where advanced AI capabilities are integrated into vehicular systems to enhance autonomous operations and decision-making. Embodied agents, such as Autonomous Vehicles (AV&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01176v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01176v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01176v1-abstract-full" style="display: none;"> Embodied AI is a rapidly advancing field that bridges the gap between cyberspace and physical space, enabling a wide range of applications. This evolution has led to the development of the Vehicular Embodied AI NETwork (VEANET), where advanced AI capabilities are integrated into vehicular systems to enhance autonomous operations and decision-making. Embodied agents, such as Autonomous Vehicles (AVs), are autonomous entities that can perceive their environment and take actions to achieve specific goals, actively interacting with the physical world. Embodied twins are digital models of these embodied agents, with various embodied AI twins for intelligent applications in cyberspace. In VEANET, embodied AI twins act as in-vehicle AI assistants to perform diverse tasks supporting autonomous driving using generative AI models. Due to limited computational resources of AVs, these AVs often offload computationally intensive tasks, such as constructing and updating embodied AI twins, to nearby RSUs. However, since the rapid mobility of AVs and the limited provision coverage of a single RSU, embodied AI twins require dynamic migrations from current RSU to other RSUs in real-time, resulting in the challenge of selecting suitable RSUs for efficient embodied AI twins migrations. Given information asymmetry, AVs cannot know the detailed information of RSUs. To this end, in this paper, we construct a multi-dimensional contract theoretical model between AVs and alternative RSUs. Considering that AVs may exhibit irrational behavior, we utilize prospect theory instead of expected utility theory to model the actual utilities of AVs. Finally, we employ a generative diffusion model-based algorithm to identify the optimal contract designs. Compared with traditional deep reinforcement learning algorithms, numerical results demonstrate the effectiveness of the proposed scheme. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01176v1-abstract-full').style.display = 'none'; document.getElementById('2410.01176v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00667">arXiv:2410.00667</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.00667">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Classical Physics">physics.class-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.jenvman.2024.123321">10.1016/j.jenvman.2024.123321 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Contribution of soundscape appropriateness to soundscape quality assessment in space: a mediating variable affecting acoustic comfort </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xinhao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+G">Guangyu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+X">Xiaodong Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jian Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00667v2-abstract-short" style="display: inline;"> Soundscape appropriateness (SA) provides supplemental information on the matching degree between auditory information and the surrounding scene in soundscape perception. This indicator has been integrated into the standard ISO process for collecting soundscape data, forming a component of the sound quality assessment questionnaire. However, its role in soundscape quality assessment has not been fu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00667v2-abstract-full').style.display = 'inline'; document.getElementById('2410.00667v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00667v2-abstract-full" style="display: none;"> Soundscape appropriateness (SA) provides supplemental information on the matching degree between auditory information and the surrounding scene in soundscape perception. This indicator has been integrated into the standard ISO process for collecting soundscape data, forming a component of the sound quality assessment questionnaire. However, its role in soundscape quality assessment has not been fully understood. Herein, we present the findings from soundscape data collected from Beiling Park in Shenyang, China. A method was developed that integrates mediation effect models with multiscale geographically weighted regression models to explore the mediating role of SA in the impact of sound source types on soundscape quality, as well as the spatial heterogeneity of this mediation effect. The results confirm that SA does mediates the influence of sound source types on acoustics comfort (AC). Specifically, natural sounds (indirect effect/total effect = .19/.19), traffic sounds (indirect effect/total effect = -.46/-.65), and commercial sounds (indirect effect/total effect = -.25/-.12) impact the perception of AC by either enhancing or reducing SA. Moreover, the relationships among variables depicted in this model demonstrate spatial heterogeneity, demonstrating that in urban open spaces with complex constructures, local spatial models may be needed for soundscape assessment. The research reaffirms the significance of SA in urban open spaces. In terms of practical implications for urban and landscape planners, when sound sources cannot be controlled or altered, coordinating between the sound and the surrounding environment through landscape optimisation could also improve the quality of the soundscape through enhancing SA and help achieve the goal of creating healthy urban open spaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00667v2-abstract-full').style.display = 'none'; document.getElementById('2410.00667v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Journal of Environmental Management</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00367">arXiv:2410.00367</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.00367">pdf</a>, <a href="https://arxiv.org/format/2410.00367">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ROK Defense M&amp;S in the Age of Hyperscale AI: Concepts, Challenges, and Future Directions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+Y">Youngjoon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+T">Taehyun Park</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+Y">Yeongjoon Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jonghoe Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Joonhyuk Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00367v1-abstract-short" style="display: inline;"> Integrating hyperscale AI into national defense modeling and simulation (M&amp;S) is crucial for enhancing strategic and operational capabilities. We explore how hyperscale AI can revolutionize defense M\&amp;S by providing unprecedented accuracy, speed, and the ability to simulate complex scenarios. Countries such as the United States and China are at the forefront of adopting these technologies and are&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00367v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00367v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00367v1-abstract-full" style="display: none;"> Integrating hyperscale AI into national defense modeling and simulation (M&amp;S) is crucial for enhancing strategic and operational capabilities. We explore how hyperscale AI can revolutionize defense M\&amp;S by providing unprecedented accuracy, speed, and the ability to simulate complex scenarios. Countries such as the United States and China are at the forefront of adopting these technologies and are experiencing varying degrees of success. Maximizing the potential of hyperscale AI necessitates addressing critical challenges, such as closed networks, long-tail data, complex decision-making, and a shortage of experts. Future directions emphasize the adoption of domestic foundation models, the investment in various GPUs / NPUs, the utilization of big tech services, and the use of open source software. These initiatives will enhance national security, maintain competitive advantages, and promote broader technological and economic progress. With this blueprint, the Republic of Korea can strengthen its defense capabilities and stay ahead of the emerging threats of modern warfare. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00367v1-abstract-full').style.display = 'none'; document.getElementById('2410.00367v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17506">arXiv:2409.17506</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.17506">pdf</a>, <a href="https://arxiv.org/format/2409.17506">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Resource Allocation for Multi-modal Semantic Communication in Mobile AIGC Networks: A Diffusion-based Game Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jian Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+M">Ming Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+J">Jinbo Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Weiting Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Ying Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17506v1-abstract-short" style="display: inline;"> Mobile Artificial Intelligence-Generated Content (AIGC) networks enable massive users to obtain customized content generation services. However, users still need to download a large number of AIGC outputs from mobile AIGC service providers, which strains communication resources and increases the risk of transmission failures. Fortunately, Semantic Communication (SemCom) can improve transmission ef&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17506v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17506v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17506v1-abstract-full" style="display: none;"> Mobile Artificial Intelligence-Generated Content (AIGC) networks enable massive users to obtain customized content generation services. However, users still need to download a large number of AIGC outputs from mobile AIGC service providers, which strains communication resources and increases the risk of transmission failures. Fortunately, Semantic Communication (SemCom) can improve transmission efficiency and reliability through semantic information processing. Moreover, recent advances in Generative Artificial Intelligence (GAI) further enhanced the effectiveness of SemCom through its powerful generative capabilities. However, how to strike a balance between high-quality content generation and the size of semantic information transmitted is a major challenge. In this paper, we propose a Generative Diffusion Model (GDM)-based multi-modal SemCom (GM-SemCom) framework. The framework improves the accuracy of information reconstruction by integrating GDMs and multi-modal semantic information and also adopts a controllable extraction module for efficient and controllable problems of unstable data recovery and slow decoding speed in GAI-enabled SemCom. Then, we introduce a novel metric called Age of Semantic Information (AoSI) based on the concept of Age of Information (AoI) to quantify the freshness of semantic information. To address the resource trading problem within the framework, we propose a Stackelberg game model, which integrates the AoSI with psychological factors to provide a comprehensive measure of user utility. Furthermore, we propose a GDM-based algorithm to solve the game under incomplete information. Compared with the traditional deep reinforcement learning algorithms, numerical results demonstrate that the proposed algorithm converges faster and is closer to the Stackelberg equilibrium. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17506v1-abstract-full').style.display = 'none'; document.getElementById('2409.17506v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16322">arXiv:2409.16322</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.16322">pdf</a>, <a href="https://arxiv.org/format/2409.16322">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Towards Within-Class Variation in Alzheimer&#39;s Disease Detection from Spontaneous Speech </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+D">Dongrui Han</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+L">Lingwei Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jingyan Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jinchao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xixin Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16322v1-abstract-short" style="display: inline;"> Alzheimer&#39;s Disease (AD) detection has emerged as a promising research area that employs machine learning classification models to distinguish between individuals with AD and those without. Unlike conventional classification tasks, we identify within-class variation as a critical challenge in AD detection: individuals with AD exhibit a spectrum of cognitive impairments. Given that many AD detectio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16322v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16322v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16322v1-abstract-full" style="display: none;"> Alzheimer&#39;s Disease (AD) detection has emerged as a promising research area that employs machine learning classification models to distinguish between individuals with AD and those without. Unlike conventional classification tasks, we identify within-class variation as a critical challenge in AD detection: individuals with AD exhibit a spectrum of cognitive impairments. Given that many AD detection tasks lack fine-grained labels, simplistic binary classification may overlook two crucial aspects: within-class differences and instance-level imbalance. The former compels the model to map AD samples with varying degrees of impairment to a single diagnostic label, disregarding certain changes in cognitive function. While the latter biases the model towards overrepresented severity levels. This work presents early efforts to address these challenges. We propose two novel methods: Soft Target Distillation (SoTD) and Instance-level Re-balancing (InRe), targeting two problems respectively. Experiments on the ADReSS and ADReSSo datasets demonstrate that the proposed methods significantly improve detection accuracy. Further analysis reveals that SoTD effectively harnesses the strengths of multiple component models, while InRe substantially alleviates model over-fitting. These findings provide insights for developing more robust and reliable AD detection models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16322v1-abstract-full').style.display = 'none'; document.getElementById('2409.16322v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15779">arXiv:2409.15779</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.15779">pdf</a>, <a href="https://arxiv.org/format/2409.15779">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> A Robust, Task-Agnostic and Fully-Scalable Voxel Mapping System for Large Scale Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=La%2C+J">Jinche La</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jun-Gill Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+D">Dasol Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15779v1-abstract-short" style="display: inline;"> Perception still remains a challenging problem for autonomous navigation in unknown environment, especially for aerial vehicles. Most mapping algorithms for autonomous navigation are specifically designed for their very intended task, which hinders extended usage or cooperative task. In this paper, we propose a voxel mapping system that can build an adaptable map for multiple tasks. The system emp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15779v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15779v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15779v1-abstract-full" style="display: none;"> Perception still remains a challenging problem for autonomous navigation in unknown environment, especially for aerial vehicles. Most mapping algorithms for autonomous navigation are specifically designed for their very intended task, which hinders extended usage or cooperative task. In this paper, we propose a voxel mapping system that can build an adaptable map for multiple tasks. The system employs hash table-based map structure and manages each voxel with spatial and temporal priorities without explicit map boundary. We also introduce an efficient map-sharing feature with minimal bandwidth to enable multi-agent applications. We tested the system in real world and simulation environment by applying it for various tasks including local mapping, global mapping, cooperative multi-agent navigation, and high-speed navigation. Our system proved its capability to build customizable map with high resolution, wide coverage, and real-time performance regardless of sensor and environment. The system can build a full-resolution map using the map-sharing feature, with over 95 % of bandwidth reduction from raw sensor data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15779v1-abstract-full').style.display = 'none'; document.getElementById('2409.15779v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 6 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15695">arXiv:2409.15695</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.15695">pdf</a>, <a href="https://arxiv.org/format/2409.15695">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Toward Mixture-of-Experts Enabled Trustworthy Semantic Communication for 6G Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jiayi He</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaofeng Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Ci Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+X">Xuemin Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15695v1-abstract-short" style="display: inline;"> Semantic Communication (SemCom) plays a pivotal role in 6G networks, offering a viable solution for future efficient communication. Deep Learning (DL)-based semantic codecs further enhance this efficiency. However, the vulnerability of DL models to security threats, such as adversarial attacks, poses significant challenges for practical applications of SemCom systems. These vulnerabilities enable&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15695v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15695v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15695v1-abstract-full" style="display: none;"> Semantic Communication (SemCom) plays a pivotal role in 6G networks, offering a viable solution for future efficient communication. Deep Learning (DL)-based semantic codecs further enhance this efficiency. However, the vulnerability of DL models to security threats, such as adversarial attacks, poses significant challenges for practical applications of SemCom systems. These vulnerabilities enable attackers to tamper with messages and eavesdrop on private information, especially in wireless communication scenarios. Although existing defenses attempt to address specific threats, they often fail to simultaneously handle multiple heterogeneous attacks. To overcome this limitation, we introduce a novel Mixture-of-Experts (MoE)-based SemCom system. This system comprises a gating network and multiple experts, each specializing in different security challenges. The gating network adaptively selects suitable experts to counter heterogeneous attacks based on user-defined security requirements. Multiple experts collaborate to accomplish semantic communication tasks while meeting the security requirements of users. A case study in vehicular networks demonstrates the efficacy of the MoE-based SemCom system. Simulation results show that the proposed MoE-based SemCom system effectively mitigates concurrent heterogeneous attacks, with minimal impact on downstream task accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15695v1-abstract-full').style.display = 'none'; document.getElementById('2409.15695v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15371">arXiv:2409.15371</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.15371">pdf</a>, <a href="https://arxiv.org/format/2409.15371">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bone: Block Affine Transformation as Parameter Efficient Fine-tuning Methods for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiale Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15371v3-abstract-short" style="display: inline;"> Low-Rank Adaptation (LoRA) has achieved remarkable training results by freezing the original weights and training only low-rank matrices, establishing itself as the predominant fine-tuning method for LLMs. In pursuit of performance closer to full-parameter training, a series of LoRA variants have emerged, such as LoRA+, PISSA, Olora, and LoRA-GA. However, these improvements complicate the initial&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15371v3-abstract-full').style.display = 'inline'; document.getElementById('2409.15371v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15371v3-abstract-full" style="display: none;"> Low-Rank Adaptation (LoRA) has achieved remarkable training results by freezing the original weights and training only low-rank matrices, establishing itself as the predominant fine-tuning method for LLMs. In pursuit of performance closer to full-parameter training, a series of LoRA variants have emerged, such as LoRA+, PISSA, Olora, and LoRA-GA. However, these improvements complicate the initial setup of model training and increase initialization time. More importantly, they overlook the internal interactions of the original weight information. To address these issues, we introduce a novel theory, ``Weight Guide&#39;&#39; aimed at continuously guiding trainable matrices through the original weights during training to enhance the utilization of weight information. Based on this theory, we designed a new PEFT technique called Bone (\textbf{B}l\textbf{o}ck Affi\textbf{ne}), which not only enhances the utilization of original weight information but also emphasizes the internal connections between weights, leading to faster convergence and better data fitting. Experimental comparisons across two different LLM architectures (LLaMA2, RWKV6) and various parameter scales demonstrate that the Bone structure can achieve rapid convergence and superior data fitting without the need for complex initialization. For example, when fine-tuning LLaMA2-7B on the MetaMathQA dataset and validating on GSM8k and math benchmarks, Bone achieved fine-tuning scores of 49.36 and 8.8, respectively, outperforming PISSA by 5.84\% and 1.96\%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15371v3-abstract-full').style.display = 'none'; document.getElementById('2409.15371v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12992">arXiv:2409.12992</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.12992">pdf</a>, <a href="https://arxiv.org/format/2409.12992">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DiffEditor: Enhancing Speech Editing with Semantic Enrichment and Acoustic Consistency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+Y">Yuhang Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+S">Shiwan Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Z">Ziyue Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoran Li</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiarong Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+Y">Yong Qin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12992v1-abstract-short" style="display: inline;"> As text-based speech editing becomes increasingly prevalent, the demand for unrestricted free-text editing continues to grow. However, existing speech editing techniques encounter significant challenges, particularly in maintaining intelligibility and acoustic consistency when dealing with out-of-domain (OOD) text. In this paper, we introduce, DiffEditor, a novel speech editing model designed to e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12992v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12992v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12992v1-abstract-full" style="display: none;"> As text-based speech editing becomes increasingly prevalent, the demand for unrestricted free-text editing continues to grow. However, existing speech editing techniques encounter significant challenges, particularly in maintaining intelligibility and acoustic consistency when dealing with out-of-domain (OOD) text. In this paper, we introduce, DiffEditor, a novel speech editing model designed to enhance performance in OOD text scenarios through semantic enrichment and acoustic consistency. To improve the intelligibility of the edited speech, we enrich the semantic information of phoneme embeddings by integrating word embeddings extracted from a pretrained language model. Furthermore, we emphasize that interframe smoothing properties are critical for modeling acoustic consistency, and thus we propose a first-order loss function to promote smoother transitions at editing boundaries and enhance the overall fluency of the edited speech. Experimental results demonstrate that our model achieves state-of-the-art performance in both in-domain and OOD text scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12992v1-abstract-full').style.display = 'none'; document.getElementById('2409.12992v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12388">arXiv:2409.12388</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.12388">pdf</a>, <a href="https://arxiv.org/format/2409.12388">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Disentangling Speakers in Multi-Talker Speech Recognition with Speaker-Aware CTC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+L">Lingwei Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+M">Mingyu Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yuejiao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xixin Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xunying Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12388v1-abstract-short" style="display: inline;"> Multi-talker speech recognition (MTASR) faces unique challenges in disentangling and transcribing overlapping speech. To address these challenges, this paper investigates the role of Connectionist Temporal Classification (CTC) in speaker disentanglement when incorporated with Serialized Output Training (SOT) for MTASR. Our visualization reveals that CTC guides the encoder to represent different sp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12388v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12388v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12388v1-abstract-full" style="display: none;"> Multi-talker speech recognition (MTASR) faces unique challenges in disentangling and transcribing overlapping speech. To address these challenges, this paper investigates the role of Connectionist Temporal Classification (CTC) in speaker disentanglement when incorporated with Serialized Output Training (SOT) for MTASR. Our visualization reveals that CTC guides the encoder to represent different speakers in distinct temporal regions of acoustic embeddings. Leveraging this insight, we propose a novel Speaker-Aware CTC (SACTC) training objective, based on the Bayes risk CTC framework. SACTC is a tailored CTC variant for multi-talker scenarios, it explicitly models speaker disentanglement by constraining the encoder to represent different speakers&#39; tokens at specific time frames. When integrated with SOT, the SOT-SACTC model consistently outperforms standard SOT-CTC across various degrees of speech overlap. Specifically, we observe relative word error rate reductions of 10% overall and 15% on low-overlap speech. This work represents an initial exploration of CTC-based enhancements for MTASR tasks, offering a new perspective on speaker disentanglement in multi-talker speech recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12388v1-abstract-full').style.display = 'none'; document.getElementById('2409.12388v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11653">arXiv:2409.11653</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.11653">pdf</a>, <a href="https://arxiv.org/format/2409.11653">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Semi-Supervised Learning via Representative and Diverse Sample Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shao%2C+Q">Qian Shao</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiangrui Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Q">Qiyuan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zepeng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+H">Hongxia Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yiwen Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+J">Jiajuan Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Jian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11653v2-abstract-short" style="display: inline;"> Semi-Supervised Learning (SSL) has become a preferred paradigm in many deep learning tasks, which reduces the need for human labor. Previous studies primarily focus on effectively utilising the labelled and unlabeled data to improve performance. However, we observe that how to select samples for labelling also significantly impacts performance, particularly under extremely low-budget settings. The&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11653v2-abstract-full').style.display = 'inline'; document.getElementById('2409.11653v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11653v2-abstract-full" style="display: none;"> Semi-Supervised Learning (SSL) has become a preferred paradigm in many deep learning tasks, which reduces the need for human labor. Previous studies primarily focus on effectively utilising the labelled and unlabeled data to improve performance. However, we observe that how to select samples for labelling also significantly impacts performance, particularly under extremely low-budget settings. The sample selection task in SSL has been under-explored for a long time. To fill in this gap, we propose a Representative and Diverse Sample Selection approach (RDSS). By adopting a modified Frank-Wolfe algorithm to minimise a novel criterion $伪$-Maximum Mean Discrepancy ($伪$-MMD), RDSS samples a representative and diverse subset for annotation from the unlabeled data. We demonstrate that minimizing $伪$-MMD enhances the generalization ability of low-budget learning. Experimental results show that RDSS consistently improves the performance of several popular SSL frameworks and outperforms the state-of-the-art sample selection approaches used in Active Learning (AL) and Semi-Supervised Active Learning (SSAL), even with constrained annotation budgets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11653v2-abstract-full').style.display = 'none'; document.getElementById('2409.11653v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10587">arXiv:2409.10587</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.10587">pdf</a>, <a href="https://arxiv.org/format/2409.10587">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SoccerNet 2024 Challenges Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cioppa%2C+A">Anthony Cioppa</a>, <a href="/search/cs?searchtype=author&amp;query=Giancola%2C+S">Silvio Giancola</a>, <a href="/search/cs?searchtype=author&amp;query=Somers%2C+V">Vladimir Somers</a>, <a href="/search/cs?searchtype=author&amp;query=Joos%2C+V">Victor Joos</a>, <a href="/search/cs?searchtype=author&amp;query=Magera%2C+F">Floriane Magera</a>, <a href="/search/cs?searchtype=author&amp;query=Held%2C+J">Jan Held</a>, <a href="/search/cs?searchtype=author&amp;query=Ghasemzadeh%2C+S+A">Seyed Abolfazl Ghasemzadeh</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Seweryn%2C+K">Karolina Seweryn</a>, <a href="/search/cs?searchtype=author&amp;query=Kowalczyk%2C+M">Mateusz Kowalczyk</a>, <a href="/search/cs?searchtype=author&amp;query=Mr%C3%B3z%2C+Z">Zuzanna Mr贸z</a>, <a href="/search/cs?searchtype=author&amp;query=%C5%81ukasik%2C+S">Szymon 艁ukasik</a>, <a href="/search/cs?searchtype=author&amp;query=Ha%C5%82o%C5%84%2C+M">Micha艂 Ha艂o艅</a>, <a href="/search/cs?searchtype=author&amp;query=Mkhallati%2C+H">Hassan Mkhallati</a>, <a href="/search/cs?searchtype=author&amp;query=Deli%C3%A8ge%2C+A">Adrien Deli猫ge</a>, <a href="/search/cs?searchtype=author&amp;query=Hinojosa%2C+C">Carlos Hinojosa</a>, <a href="/search/cs?searchtype=author&amp;query=Sanchez%2C+K">Karen Sanchez</a>, <a href="/search/cs?searchtype=author&amp;query=Mansourian%2C+A+M">Amir M. Mansourian</a>, <a href="/search/cs?searchtype=author&amp;query=Miralles%2C+P">Pierre Miralles</a>, <a href="/search/cs?searchtype=author&amp;query=Barnich%2C+O">Olivier Barnich</a>, <a href="/search/cs?searchtype=author&amp;query=De+Vleeschouwer%2C+C">Christophe De Vleeschouwer</a>, <a href="/search/cs?searchtype=author&amp;query=Alahi%2C+A">Alexandre Alahi</a>, <a href="/search/cs?searchtype=author&amp;query=Ghanem%2C+B">Bernard Ghanem</a>, <a href="/search/cs?searchtype=author&amp;query=Van+Droogenbroeck%2C+M">Marc Van Droogenbroeck</a>, <a href="/search/cs?searchtype=author&amp;query=Gorski%2C+A">Adam Gorski</a> , et al. (59 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10587v1-abstract-short" style="display: inline;"> The SoccerNet 2024 challenges represent the fourth annual video understanding challenges organized by the SoccerNet team. These challenges aim to advance research across multiple themes in football, including broadcast video understanding, field understanding, and player understanding. This year, the challenges encompass four vision-based tasks. (1) Ball Action Spotting, focusing on precisely loca&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10587v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10587v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10587v1-abstract-full" style="display: none;"> The SoccerNet 2024 challenges represent the fourth annual video understanding challenges organized by the SoccerNet team. These challenges aim to advance research across multiple themes in football, including broadcast video understanding, field understanding, and player understanding. This year, the challenges encompass four vision-based tasks. (1) Ball Action Spotting, focusing on precisely localizing when and which soccer actions related to the ball occur, (2) Dense Video Captioning, focusing on describing the broadcast with natural language and anchored timestamps, (3) Multi-View Foul Recognition, a novel task focusing on analyzing multiple viewpoints of a potential foul incident to classify whether a foul occurred and assess its severity, (4) Game State Reconstruction, another novel task focusing on reconstructing the game state from broadcast videos onto a 2D top-view map of the field. Detailed information about the tasks, challenges, and leaderboards can be found at https://www.soccer-net.org, with baselines and development kits available at https://github.com/SoccerNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10587v1-abstract-full').style.display = 'none'; document.getElementById('2409.10587v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10072">arXiv:2409.10072</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.10072">pdf</a>, <a href="https://arxiv.org/format/2409.10072">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Speaker Contrastive Learning for Source Speaker Tracing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Q">Qing Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+H">Hongmei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jian Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+M">Mengjie Du</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jie Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiao-Lei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10072v1-abstract-short" style="display: inline;"> As a form of biometric authentication technology, the security of speaker verification systems is of utmost importance. However, SV systems are inherently vulnerable to various types of attacks that can compromise their accuracy and reliability. One such attack is voice conversion, which modifies a persons speech to sound like another person by altering various vocal characteristics. This poses a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10072v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10072v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10072v1-abstract-full" style="display: none;"> As a form of biometric authentication technology, the security of speaker verification systems is of utmost importance. However, SV systems are inherently vulnerable to various types of attacks that can compromise their accuracy and reliability. One such attack is voice conversion, which modifies a persons speech to sound like another person by altering various vocal characteristics. This poses a significant threat to SV systems. To address this challenge, the Source Speaker Tracing Challenge in IEEE SLT2024 aims to identify the source speaker information in manipulated speech signals. Specifically, SSTC focuses on source speaker verification against voice conversion to determine whether two converted speech samples originate from the same source speaker. In this study, we propose a speaker contrastive learning-based approach for source speaker tracing to learn the latent source speaker information in converted speech. To learn a more source-speaker-related representation, we employ speaker contrastive loss during the training of the embedding extractor. This speaker contrastive loss helps identify the true source speaker embedding among several distractor speaker embeddings, enabling the embedding extractor to learn the potentially possessing source speaker information present in the converted speech. Experiments demonstrate that our proposed speaker contrastive learning system achieves the lowest EER of 16.788% on the challenge test set, securing first place in the challenge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10072v1-abstract-full').style.display = 'none'; document.getElementById('2409.10072v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 2 figures, accepted by SLT</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09343">arXiv:2409.09343</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.09343">pdf</a>, <a href="https://arxiv.org/format/2409.09343">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Generative AI in Data Center Networking: Fundamentals, Perspectives, and Case Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yinqiu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+Y">Yonggang Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09343v1-abstract-short" style="display: inline;"> Generative AI (GenAI), exemplified by Large Language Models (LLMs) such as OpenAI&#39;s ChatGPT, is revolutionizing various fields. Central to this transformation is Data Center Networking (DCN), which not only provides the computational power necessary for GenAI training and inference but also delivers GenAI-driven services to users. This article examines an interplay between GenAI and DCNs, highligh&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09343v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09343v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09343v1-abstract-full" style="display: none;"> Generative AI (GenAI), exemplified by Large Language Models (LLMs) such as OpenAI&#39;s ChatGPT, is revolutionizing various fields. Central to this transformation is Data Center Networking (DCN), which not only provides the computational power necessary for GenAI training and inference but also delivers GenAI-driven services to users. This article examines an interplay between GenAI and DCNs, highlighting their symbiotic relationship and mutual advancements. We begin by reviewing current challenges within DCNs and discuss how GenAI contributes to enhancing DCN capabilities through innovations, such as data augmentation, process automation, and domain transfer. We then focus on analyzing the distinctive characteristics of GenAI workloads on DCNs, gaining insights that catalyze the evolution of DCNs to more effectively support GenAI and LLMs. Moreover, to illustrate the seamless integration of GenAI with DCNs, we present a case study on full-lifecycle DCN digital twins. In this study, we employ LLMs equipped with Retrieval Augmented Generation (RAG) to formulate optimization problems for DCNs and adopt Diffusion-Deep Reinforcement Learning (DRL) for optimizing the RAG knowledge placement strategy. This approach not only demonstrates the application of advanced GenAI methods within DCNs but also positions the digital twin as a pivotal GenAI service operating on DCNs. We anticipate that this article can promote further research into enhancing the virtuous interaction between GenAI and DCNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09343v1-abstract-full').style.display = 'none'; document.getElementById('2409.09343v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08797">arXiv:2409.08797</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.08797">pdf</a>, <a href="https://arxiv.org/format/2409.08797">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Exploring SSL Discrete Speech Features for Zipformer-based Contextual ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+M">Mingyu Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yifan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+J">Jiajun Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+S">Shujie Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tianzi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhaoqing Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shiliang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xunying Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08797v1-abstract-short" style="display: inline;"> Self-supervised learning (SSL) based discrete speech representations are highly compact and domain adaptable. In this paper, SSL discrete speech features extracted from WavLM models are used as additional cross-utterance acoustic context features in Zipformer-Transducer ASR systems. The efficacy of replacing Fbank features with discrete token features for modelling either cross-utterance contexts&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08797v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08797v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08797v1-abstract-full" style="display: none;"> Self-supervised learning (SSL) based discrete speech representations are highly compact and domain adaptable. In this paper, SSL discrete speech features extracted from WavLM models are used as additional cross-utterance acoustic context features in Zipformer-Transducer ASR systems. The efficacy of replacing Fbank features with discrete token features for modelling either cross-utterance contexts (from preceding and future segments), or current utterance&#39;s internal contexts alone, or both at the same time, are demonstrated thoroughly on the Gigaspeech 1000-hr corpus. The best Zipformer-Transducer system using discrete tokens based cross-utterance context features outperforms the baseline using utterance internal context only with statistically significant word error rate (WER) reductions of 0.32% to 0.41% absolute (2.78% to 3.54% relative) on the dev and test data. The lowest published WER of 11.15% and 11.14% were obtained on the dev and test sets. Our work is open-source and publicly available at https://github.com/open-creator/icefall/tree/master/egs/gigaspeech/Context\_ASR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08797v1-abstract-full').style.display = 'none'; document.getElementById('2409.08797v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08596">arXiv:2409.08596</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.08596">pdf</a>, <a href="https://arxiv.org/format/2409.08596">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Large Language Model Can Transcribe Speech in Multi-Talker Scenarios with Versatile Instructions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Meng%2C+L">Lingwei Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+S">Shujie Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhaoqing Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yuejiao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+W">Wenxuan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xixin Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xunying Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08596v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) have revolutionized various domains, bringing significant progress and new opportunities. Despite progress in speech-related tasks, LLMs have not been sufficiently explored in multi-talker scenarios. In this work, we present a pioneering effort to investigate the capability of LLMs in transcribing speech in multi-talker environments, following ve&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08596v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08596v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08596v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) have revolutionized various domains, bringing significant progress and new opportunities. Despite progress in speech-related tasks, LLMs have not been sufficiently explored in multi-talker scenarios. In this work, we present a pioneering effort to investigate the capability of LLMs in transcribing speech in multi-talker environments, following versatile instructions related to multi-talker automatic speech recognition (ASR), target talker ASR, and ASR based on specific talker attributes such as sex, occurrence order, language, and keyword spoken. Our approach utilizes WavLM and Whisper encoder to extract multi-faceted speech representations that are sensitive to speaker characteristics and semantic context. These representations are then fed into an LLM fine-tuned using LoRA, enabling the capabilities for speech comprehension and transcription. Comprehensive experiments reveal the promising performance of our proposed system, MT-LLM, in cocktail party scenarios, highlighting the potential of LLM to handle speech-related tasks based on user instructions in such complex settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08596v1-abstract-full').style.display = 'none'; document.getElementById('2409.08596v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07706">arXiv:2409.07706</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.07706">pdf</a>, <a href="https://arxiv.org/format/2409.07706">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Attack End-to-End Autonomous Driving through Module-Wise Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Lu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tianyuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+Y">Yikai Han</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+M">Muyang Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+T">Ting Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiaqi Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07706v1-abstract-short" style="display: inline;"> With recent breakthroughs in deep neural networks, numerous tasks within autonomous driving have exhibited remarkable performance. However, deep learning models are susceptible to adversarial attacks, presenting significant security risks to autonomous driving systems. Presently, end-to-end architectures have emerged as the predominant solution for autonomous driving, owing to their collaborative&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07706v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07706v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07706v1-abstract-full" style="display: none;"> With recent breakthroughs in deep neural networks, numerous tasks within autonomous driving have exhibited remarkable performance. However, deep learning models are susceptible to adversarial attacks, presenting significant security risks to autonomous driving systems. Presently, end-to-end architectures have emerged as the predominant solution for autonomous driving, owing to their collaborative nature across different tasks. Yet, the implications of adversarial attacks on such models remain relatively unexplored. In this paper, we conduct comprehensive adversarial security research on the modular end-to-end autonomous driving model for the first time. We thoroughly consider the potential vulnerabilities in the model inference process and design a universal attack scheme through module-wise noise injection. We conduct large-scale experiments on the full-stack autonomous driving model and demonstrate that our attack method outperforms previous attack methods. We trust that our research will offer fresh insights into ensuring the safety and reliability of autonomous driving systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07706v1-abstract-full').style.display = 'none'; document.getElementById('2409.07706v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07321">arXiv:2409.07321</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.07321">pdf</a>, <a href="https://arxiv.org/format/2409.07321">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Module-wise Adaptive Adversarial Training for End-to-end Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tianyuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Lu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiaqi Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xinwei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+S">Siyuan Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuwei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+A">Aishan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xianglong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07321v1-abstract-short" style="display: inline;"> Recent advances in deep learning have markedly improved autonomous driving (AD) models, particularly end-to-end systems that integrate perception, prediction, and planning stages, achieving state-of-the-art performance. However, these models remain vulnerable to adversarial attacks, where human-imperceptible perturbations can disrupt decision-making processes. While adversarial training is an effe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07321v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07321v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07321v1-abstract-full" style="display: none;"> Recent advances in deep learning have markedly improved autonomous driving (AD) models, particularly end-to-end systems that integrate perception, prediction, and planning stages, achieving state-of-the-art performance. However, these models remain vulnerable to adversarial attacks, where human-imperceptible perturbations can disrupt decision-making processes. While adversarial training is an effective method for enhancing model robustness against such attacks, no prior studies have focused on its application to end-to-end AD models. In this paper, we take the first step in adversarial training for end-to-end AD models and present a novel Module-wise Adaptive Adversarial Training (MA2T). However, extending conventional adversarial training to this context is highly non-trivial, as different stages within the model have distinct objectives and are strongly interconnected. To address these challenges, MA2T first introduces Module-wise Noise Injection, which injects noise before the input of different modules, targeting training models with the guidance of overall objectives rather than each independent module loss. Additionally, we introduce Dynamic Weight Accumulation Adaptation, which incorporates accumulated weight changes to adaptively learn and adjust the loss weights of each module based on their contributions (accumulated reduction rates) for better balance and robust training. To demonstrate the efficacy of our defense, we conduct extensive experiments on the widely-used nuScenes dataset across several end-to-end AD models under both white-box and black-box attacks, where our method outperforms other baselines by large margins (+5-10%). Moreover, we validate the robustness of our defense through closed-loop evaluation in the CARLA simulation environment, showing improved resilience even against natural corruption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07321v1-abstract-full').style.display = 'none'; document.getElementById('2409.07321v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05576">arXiv:2409.05576</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.05576">pdf</a>, <a href="https://arxiv.org/format/2409.05576">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> JavaVFC: Java Vulnerability Fixing Commits from Open-source Software </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bui%2C+T">Tan Bui</a>, <a href="/search/cs?searchtype=author&amp;query=Tun%2C+Y+N">Yan Naing Tun</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+Y">Yiran Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Irsan%2C+I+C">Ivana Clairine Irsan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Ting Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+H+J">Hong Jin Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05576v1-abstract-short" style="display: inline;"> We present a comprehensive dataset of Java vulnerability-fixing commits (VFCs) to advance research in Java vulnerability analysis. Our dataset, derived from thousands of open-source Java projects on GitHub, comprises two variants: JavaVFC and JavaVFC-extended. The dataset was constructed through a rigorous process involving heuristic rules and multiple rounds of manual labeling. We initially used&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05576v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05576v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05576v1-abstract-full" style="display: none;"> We present a comprehensive dataset of Java vulnerability-fixing commits (VFCs) to advance research in Java vulnerability analysis. Our dataset, derived from thousands of open-source Java projects on GitHub, comprises two variants: JavaVFC and JavaVFC-extended. The dataset was constructed through a rigorous process involving heuristic rules and multiple rounds of manual labeling. We initially used keywords to filter candidate VFCs based on commit messages, then refined this keyword set through iterative manual labeling. The final labeling round achieved a precision score of 0.7 among three annotators. We applied the refined keyword set to 34,321 open-source Java repositories with over 50 GitHub stars, resulting in JavaVFC with 784 manually verified VFCs and JavaVFC-extended with 16,837 automatically identified VFCs. Both variants are presented in a standardized JSONL format for easy access and analysis. This dataset supports various research endeavors, including VFC identification, fine-grained vulnerability detection, and automated vulnerability repair. The JavaVFC and JavaVFC-extended are publicly available at https://zenodo.org/records/13731781. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05576v1-abstract-full').style.display = 'none'; document.getElementById('2409.05576v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05484">arXiv:2409.05484</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.05484">pdf</a>, <a href="https://arxiv.org/format/2409.05484">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> CRADLE-VAE: Enhancing Single-Cell Gene Perturbation Modeling with Counterfactual Reasoning-based Artifact Disentanglement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Baek%2C+S">Seungheun Baek</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+S">Soyon Park</a>, <a href="/search/cs?searchtype=author&amp;query=Chok%2C+Y+T">Yan Ting Chok</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+J">Junhyun Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Jueon Park</a>, <a href="/search/cs?searchtype=author&amp;query=Gim%2C+M">Mogan Gim</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jaewoo Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05484v2-abstract-short" style="display: inline;"> Predicting cellular responses to various perturbations is a critical focus in drug discovery and personalized therapeutics, with deep learning models playing a significant role in this endeavor. Single-cell datasets contain technical artifacts that may hinder the predictability of such models, which poses quality control issues highly regarded in this area. To address this, we propose CRADLE-VAE,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05484v2-abstract-full').style.display = 'inline'; document.getElementById('2409.05484v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05484v2-abstract-full" style="display: none;"> Predicting cellular responses to various perturbations is a critical focus in drug discovery and personalized therapeutics, with deep learning models playing a significant role in this endeavor. Single-cell datasets contain technical artifacts that may hinder the predictability of such models, which poses quality control issues highly regarded in this area. To address this, we propose CRADLE-VAE, a causal generative framework tailored for single-cell gene perturbation modeling, enhanced with counterfactual reasoning-based artifact disentanglement. Throughout training, CRADLE-VAE models the underlying latent distribution of technical artifacts and perturbation effects present in single-cell datasets. It employs counterfactual reasoning to effectively disentangle such artifacts by modulating the latent basal spaces and learns robust features for generating cellular response data with improved quality. Experimental results demonstrate that this approach improves not only treatment effect estimation performance but also generative quality as well. The CRADLE-VAE codebase is publicly available at https://github.com/dmis-lab/CRADLE-VAE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05484v2-abstract-full').style.display = 'none'; document.getElementById('2409.05484v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03303">arXiv:2409.03303</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.03303">pdf</a>, <a href="https://arxiv.org/format/2409.03303">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Improving Robustness to Multiple Spurious Correlations by Multi-Objective Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+N">Nayeong Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Juwon Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Ahn%2C+S">Sungsoo Ahn</a>, <a href="/search/cs?searchtype=author&amp;query=Ok%2C+J">Jungseul Ok</a>, <a href="/search/cs?searchtype=author&amp;query=Kwak%2C+S">Suha Kwak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03303v1-abstract-short" style="display: inline;"> We study the problem of training an unbiased and accurate model given a dataset with multiple biases. This problem is challenging since the multiple biases cause multiple undesirable shortcuts during training, and even worse, mitigating one may exacerbate the other. We propose a novel training method to tackle this challenge. Our method first groups training data so that different groups induce di&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03303v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03303v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03303v1-abstract-full" style="display: none;"> We study the problem of training an unbiased and accurate model given a dataset with multiple biases. This problem is challenging since the multiple biases cause multiple undesirable shortcuts during training, and even worse, mitigating one may exacerbate the other. We propose a novel training method to tackle this challenge. Our method first groups training data so that different groups induce different shortcuts, and then optimizes a linear combination of group-wise losses while adjusting their weights dynamically to alleviate conflicts between the groups in performance; this approach, rooted in the multi-objective optimization theory, encourages to achieve the minimax Pareto solution. We also present a new benchmark with multiple biases, dubbed MultiCelebA, for evaluating debiased training methods under realistic and challenging scenarios. Our method achieved the best on three datasets with multiple biases, and also showed superior performance on conventional single-bias datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03303v1-abstract-full').style.display = 'none'; document.getElementById('2409.03303v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">International Conference on Machine Learning 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01604">arXiv:2409.01604</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.01604">pdf</a>, <a href="https://arxiv.org/format/2409.01604">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DAPONet: A Dual Attention and Partially Overparameterized Network for Real-Time Road Damage Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pan%2C+W">Weichao Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiaju Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhihao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ge%2C+Y">Yiyuan Ge</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01604v1-abstract-short" style="display: inline;"> Current road damage detection methods, relying on manual inspections or sensor-mounted vehicles, are inefficient, limited in coverage, and often inaccurate, especially for minor damages, leading to delays and safety hazards. To address these issues and enhance real-time road damage detection using street view image data (SVRDD), we propose DAPONet, a model incorporating three key modules: a dual a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01604v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01604v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01604v1-abstract-full" style="display: none;"> Current road damage detection methods, relying on manual inspections or sensor-mounted vehicles, are inefficient, limited in coverage, and often inaccurate, especially for minor damages, leading to delays and safety hazards. To address these issues and enhance real-time road damage detection using street view image data (SVRDD), we propose DAPONet, a model incorporating three key modules: a dual attention mechanism combining global and local attention, a multi-scale partial over-parameterization module, and an efficient downsampling module. DAPONet achieves a mAP50 of 70.1% on the SVRDD dataset, outperforming YOLOv10n by 10.4%, while reducing parameters to 1.6M and FLOPs to 1.7G, representing reductions of 41% and 80%, respectively. On the MS COCO2017 val dataset, DAPONet achieves an mAP50-95 of 33.4%, 0.8% higher than EfficientDet-D1, with a 74% reduction in both parameters and FLOPs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01604v1-abstract-full').style.display = 'none'; document.getElementById('2409.01604v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16493">arXiv:2408.16493</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.16493">pdf</a>, <a href="https://arxiv.org/format/2408.16493">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Learning from Negative Samples in Generative Biomedical Entity Linking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+C">Chanhwi Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Hyunjae Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+S">Sihyeon Park</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+J">Jiwoo Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Sung%2C+M">Mujeen Sung</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jaewoo Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16493v1-abstract-short" style="display: inline;"> Generative models have become widely used in biomedical entity linking (BioEL) due to their excellent performance and efficient memory usage. However, these models are usually trained only with positive samples--entities that match the input mention&#39;s identifier--and do not explicitly learn from hard negative samples, which are entities that look similar but have different meanings. To address thi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16493v1-abstract-full').style.display = 'inline'; document.getElementById('2408.16493v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16493v1-abstract-full" style="display: none;"> Generative models have become widely used in biomedical entity linking (BioEL) due to their excellent performance and efficient memory usage. However, these models are usually trained only with positive samples--entities that match the input mention&#39;s identifier--and do not explicitly learn from hard negative samples, which are entities that look similar but have different meanings. To address this limitation, we introduce ANGEL (Learning from Negative Samples in Generative Biomedical Entity Linking), the first framework that trains generative BioEL models using negative samples. Specifically, a generative model is initially trained to generate positive samples from the knowledge base for given input entities. Subsequently, both correct and incorrect outputs are gathered from the model&#39;s top-k predictions. The model is then updated to prioritize the correct predictions through direct preference optimization. Our models fine-tuned with ANGEL outperform the previous best baseline models by up to an average top-1 accuracy of 1.4% on five benchmarks. When incorporating our framework into pre-training, the performance improvement further increases to 1.7%, demonstrating its effectiveness in both the pre-training and fine-tuning stages. Our code is available at https://github.com/dmis-lab/ANGEL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16493v1-abstract-full').style.display = 'none'; document.getElementById('2408.16493v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10937">arXiv:2408.10937</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.10937">pdf</a>, <a href="https://arxiv.org/format/2408.10937">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Proxona: Leveraging LLM-Driven Personas to Enhance Creators&#39; Understanding of Their Audience </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Choi%2C+Y">Yoonseo Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+E+J">Eun Jeong Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+S">Seulgi Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+M+K">Min Kyung Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Juho Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10937v2-abstract-short" style="display: inline;"> Creators are nothing without their audience, and thereby understanding their audience is the cornerstone of their professional achievement. Yet many creators feel lost while comprehending audiences with existing tools, which offer insufficient insights for tailoring content to audience needs. To address the challenges creators face in understanding their audience, we present Proxona, a system for&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10937v2-abstract-full').style.display = 'inline'; document.getElementById('2408.10937v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10937v2-abstract-full" style="display: none;"> Creators are nothing without their audience, and thereby understanding their audience is the cornerstone of their professional achievement. Yet many creators feel lost while comprehending audiences with existing tools, which offer insufficient insights for tailoring content to audience needs. To address the challenges creators face in understanding their audience, we present Proxona, a system for defining and extracting representative audience personas from the comments. Creators converse with personas to gain insights into their preferences and engagement, solicit feedback, and implement evidence-based improvements to their content. Powered by large language models, Proxona analyzes audience comments, distilling the latent characteristics of audiences into tangible dimensions (classification categories) and values (category attributes). Proxona then clusters these into synthetic personas. Our technical evaluations demonstrated that our pipelines effectively generated relevant and distinct dimensions and values, enabling the deduction of audience-reflecting personas, while minimizing the likelihood of hallucinations in persona responses. Our user evaluation with 11 creators showed that Proxona supported creators to gain new insights about their audience, make informed decisions, and successfully complete content creation with high confidence. Proxona&#39;s data-driven audience personas empower creators to seamlessly integrate audience perspectives into their creative processes, fostering a collaborative approach to content creation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10937v2-abstract-full').style.display = 'none'; document.getElementById('2408.10937v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages (including 14 pages of Appendix); acknowledgment added</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02954">arXiv:2408.02954</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.02954">pdf</a>, <a href="https://arxiv.org/format/2408.02954">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> WWW: Where, Which and Whatever Enhancing Interpretability in Multimodal Deepfake Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+J">Juho Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sangyoun Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jooeon Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Na%2C+Y">Yunjin Na</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02954v1-abstract-short" style="display: inline;"> All current benchmarks for multimodal deepfake detection manipulate entire frames using various generation techniques, resulting in oversaturated detection accuracies exceeding 94% at the video-level classification. However, these benchmarks struggle to detect dynamic deepfake attacks with challenging frame-by-frame alterations presented in real-world scenarios. To address this limitation, we intr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02954v1-abstract-full').style.display = 'inline'; document.getElementById('2408.02954v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02954v1-abstract-full" style="display: none;"> All current benchmarks for multimodal deepfake detection manipulate entire frames using various generation techniques, resulting in oversaturated detection accuracies exceeding 94% at the video-level classification. However, these benchmarks struggle to detect dynamic deepfake attacks with challenging frame-by-frame alterations presented in real-world scenarios. To address this limitation, we introduce FakeMix, a novel clip-level evaluation benchmark aimed at identifying manipulated segments within both video and audio, providing insight into the origins of deepfakes. Furthermore, we propose novel evaluation metrics, Temporal Accuracy (TA) and Frame-wise Discrimination Metric (FDM), to assess the robustness of deepfake detection models. Evaluating state-of-the-art models against diverse deepfake benchmarks, particularly FakeMix, demonstrates the effectiveness of our approach comprehensively. Specifically, while achieving an Average Precision (AP) of 94.2% at the video-level, the evaluation of the existing models at the clip-level using the proposed metrics, TA and FDM, yielded sharp declines in accuracy to 53.1%, and 52.1%, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02954v1-abstract-full').style.display = 'none'; document.getElementById('2408.02954v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 2 figures, 2 tables, Accepted as Oral Presentation at The Trustworthy AI Workshop @ IJCAI 2024</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kang%2C+J&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10