CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 1,327 results for author: <span class="mathjax">Xia, Y</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Xia%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Xia, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Xia%2C+Y&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Xia, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01524">arXiv:2412.01524</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.01524">pdf</a>, <a href="https://arxiv.org/format/2412.01524">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Opinion Dynamic Under Malicious Agent Influence in Multi-Agent Systems: From the Perspective of Opinion Evolution Cost </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Suo%2C+Y">Yuhan Suo</a>, <a href="/search/cs?searchtype=author&amp;query=Chai%2C+R">Runqi Chai</a>, <a href="/search/cs?searchtype=author&amp;query=Chai%2C+S">Senchun Chai</a>, <a href="/search/cs?searchtype=author&amp;query=Farhan%2C+I+M">Ishrak MD Farhan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+X">Xudong Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yuanqing Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01524v1-abstract-short" style="display: inline;"> In human social systems, debates are often seen as a means to resolve differences of opinion. However, in reality, debates frequently incur significant communication costs, especially when dealing with stubborn opponents. Inspired by this phenomenon, this paper examines the impact of malicious agents on the evolution of normal agents&#39; opinions from the perspective of opinion evolution cost, and pr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01524v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01524v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01524v1-abstract-full" style="display: none;"> In human social systems, debates are often seen as a means to resolve differences of opinion. However, in reality, debates frequently incur significant communication costs, especially when dealing with stubborn opponents. Inspired by this phenomenon, this paper examines the impact of malicious agents on the evolution of normal agents&#39; opinions from the perspective of opinion evolution cost, and proposes corresponding solutions for the scenario in which malicious agents hold different opinions in multi-agent systems(MASs). First, the paper analyzes the negative impact of malicious agents on the opinion evolution process, revealing the evolutionary cost they bring, which provides the theoretical foundation for the proposed solution. Next, based on the process of opinion evolution, a strategy is introduced where agents dynamically adjust trust values during the opinion evolution process, gradually isolating malicious agents and achieving this even when malicious agents are in the majority. Additionally, an evolution rate adjustment mechanism is introduced, allowing the system to flexibly regulate the evolution process in complex situations, effectively achieving the trade-off between opinion evolution rate and cost. Extensive numerical simulations demonstrate that the algorithm can effectively isolate the negative influence of malicious agents and achieve a balance between opinion evolution costs and convergence speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01524v1-abstract-full').style.display = 'none'; document.getElementById('2412.01524v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 12 figures, 2 tables. This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00746">arXiv:2412.00746</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.00746">pdf</a>, <a href="https://arxiv.org/format/2412.00746">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> BDefects4NN: A Backdoor Defect Database for Controlled Localization Studies in Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yisong Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+A">Aishan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xinwei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tianyuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tianlin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+S">Siyuan Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xianglong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00746v1-abstract-short" style="display: inline;"> Pre-trained large deep learning models are now serving as the dominant component for downstream middleware users and have revolutionized the learning paradigm, replacing the traditional approach of training from scratch locally. To reduce development costs, developers often integrate third-party pre-trained deep neural networks (DNNs) into their intelligent software systems. However, utilizing unt&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00746v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00746v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00746v1-abstract-full" style="display: none;"> Pre-trained large deep learning models are now serving as the dominant component for downstream middleware users and have revolutionized the learning paradigm, replacing the traditional approach of training from scratch locally. To reduce development costs, developers often integrate third-party pre-trained deep neural networks (DNNs) into their intelligent software systems. However, utilizing untrusted DNNs presents significant security risks, as these models may contain intentional backdoor defects resulting from the black-box training process. These backdoor defects can be activated by hidden triggers, allowing attackers to maliciously control the model and compromise the overall reliability of the intelligent software. To ensure the safe adoption of DNNs in critical software systems, it is crucial to establish a backdoor defect database for localization studies. This paper addresses this research gap by introducing BDefects4NN, the first backdoor defect database, which provides labeled backdoor-defected DNNs at the neuron granularity and enables controlled localization studies of defect root causes. In BDefects4NN, we define three defect injection rules and employ four representative backdoor attacks across four popular network architectures and three widely adopted datasets, yielding a comprehensive database of 1,654 backdoor-defected DNNs with four defect quantities and varying infected neurons. Based on BDefects4NN, we conduct extensive experiments on evaluating six fault localization criteria and two defect repair techniques, which show limited effectiveness for backdoor defects. Additionally, we investigate backdoor-defected models in practical scenarios, specifically in lane detection for autonomous driving and large language models (LLMs), revealing potential threats and highlighting current limitations in precise defect localization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00746v1-abstract-full').style.display = 'none'; document.getElementById('2412.00746v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, accepted by ICSE 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00266">arXiv:2412.00266</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.00266">pdf</a>, <a href="https://arxiv.org/format/2412.00266">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Unlocking Diversity of Fast-Switched Optical Data Center Networks with Unified Routing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jialong Li</a>, <a href="/search/cs?searchtype=author&amp;query=De+Marchi%2C+F">Federico De Marchi</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+Y">Yiming Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Joshi%2C+R">Raj Joshi</a>, <a href="/search/cs?searchtype=author&amp;query=Chandrasekaran%2C+B">Balakrishnan Chandrasekaran</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yiting Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00266v1-abstract-short" style="display: inline;"> Optical data center networks (DCNs) are emerging as a promising solution for cloud infrastructure in the post-Moore&#39;s Law era, particularly with the advent of &#39;fast-switched&#39; optical architectures capable of circuit reconfiguration at microsecond or even nanosecond scales. However, frequent reconfiguration of optical circuits introduces a unique challenge: in-flight packets risk loss during these&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00266v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00266v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00266v1-abstract-full" style="display: none;"> Optical data center networks (DCNs) are emerging as a promising solution for cloud infrastructure in the post-Moore&#39;s Law era, particularly with the advent of &#39;fast-switched&#39; optical architectures capable of circuit reconfiguration at microsecond or even nanosecond scales. However, frequent reconfiguration of optical circuits introduces a unique challenge: in-flight packets risk loss during these transitions, hindering the deployment of many mature optical hardware designs due to the lack of suitable routing solutions. In this paper, we present Unified Routing for Optical networks (URO), a general routing framework designed to support fast-switched optical DCNs across various hardware architectures. URO combines theoretical modeling of this novel routing problem with practical implementation on programmable switches, enabling precise, time-based packet transmission. Our prototype on Intel Tofino2 switches achieves a minimum circuit duration of 2us, ensuring end-to-end, loss-free application performance. Large-scale simulations using production DCN traffic validate URO&#39;s generality across different hardware configurations, demonstrating its effectiveness and efficient system resource utilization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00266v1-abstract-full').style.display = 'none'; document.getElementById('2412.00266v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19290">arXiv:2411.19290</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.19290">pdf</a>, <a href="https://arxiv.org/format/2411.19290">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SADG: Segment Any Dynamic Gaussian Without Object Trackers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yun-Jin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Gladkova%2C+M">Mariia Gladkova</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yan Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Cremers%2C+D">Daniel Cremers</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19290v1-abstract-short" style="display: inline;"> Understanding dynamic 3D scenes is fundamental for various applications, including extended reality (XR) and autonomous driving. Effectively integrating semantic information into 3D reconstruction enables holistic representation that opens opportunities for immersive and interactive applications. We introduce SADG, Segment Any Dynamic Gaussian Without Object Trackers, a novel approach that combine&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19290v1-abstract-full').style.display = 'inline'; document.getElementById('2411.19290v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19290v1-abstract-full" style="display: none;"> Understanding dynamic 3D scenes is fundamental for various applications, including extended reality (XR) and autonomous driving. Effectively integrating semantic information into 3D reconstruction enables holistic representation that opens opportunities for immersive and interactive applications. We introduce SADG, Segment Any Dynamic Gaussian Without Object Trackers, a novel approach that combines dynamic Gaussian Splatting representation and semantic information without reliance on object IDs. In contrast to existing works, we do not rely on supervision based on object identities to enable consistent segmentation of dynamic 3D objects. To this end, we propose to learn semantically-aware features by leveraging masks generated from the Segment Anything Model (SAM) and utilizing our novel contrastive learning objective based on hard pixel mining. The learned Gaussian features can be effectively clustered without further post-processing. This enables fast computation for further object-level editing, such as object removal, composition, and style transfer by manipulating the Gaussians in the scene. We further extend several dynamic novel-view datasets with segmentation benchmarks to enable testing of learned feature fields from unseen viewpoints. We evaluate SADG on proposed benchmarks and demonstrate the superior performance of our approach in segmenting objects within dynamic scenes along with its effectiveness for further downstream editing tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19290v1-abstract-full').style.display = 'none'; document.getElementById('2411.19290v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page https://yunjinli.github.io/project-sadg</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19000">arXiv:2411.19000</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.19000">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Unified Platform for At-Home Post-Stroke Rehabilitation Enabled by Wearable Technologies and Artificial Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tang%2C+C">Chenyu Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+R">Ruizhi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+S">Shuo Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Z">Zihe Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zibo Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiaqi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Cong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Junliang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+Y">Yanning Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shengbo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Juan%2C+R">Ruoyu Juan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Q">Qiaoying Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+R">Ruimou Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xuhang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xinkai Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yunjia Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jianan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+F">Fanghao Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+N">Ninglli Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Smielewski%2C+P">Peter Smielewski</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+Y">Yu Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+H">Hubin Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Occhipinti%2C+L+G">Luigi G. Occhipinti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19000v1-abstract-short" style="display: inline;"> At-home rehabilitation for post-stroke patients presents significant challenges, as continuous, personalized care is often limited outside clinical settings. Additionally, the absence of comprehensive solutions addressing diverse rehabilitation needs in home environments complicates recovery efforts. Here, we introduce a smart home platform that integrates wearable sensors, ambient monitoring, and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19000v1-abstract-full').style.display = 'inline'; document.getElementById('2411.19000v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19000v1-abstract-full" style="display: none;"> At-home rehabilitation for post-stroke patients presents significant challenges, as continuous, personalized care is often limited outside clinical settings. Additionally, the absence of comprehensive solutions addressing diverse rehabilitation needs in home environments complicates recovery efforts. Here, we introduce a smart home platform that integrates wearable sensors, ambient monitoring, and large language model (LLM)-powered assistance to provide seamless health monitoring and intelligent support. The system leverages machine learning enabled plantar pressure arrays for motor recovery assessment (94% classification accuracy), a wearable eye-tracking module for cognitive evaluation, and ambient sensors for precise smart home control (100% operational success, &lt;1 s latency). Additionally, the LLM-powered agent, Auto-Care, offers real-time interventions, such as health reminders and environmental adjustments, enhancing user satisfaction by 29%. This work establishes a fully integrated platform for long-term, personalized rehabilitation, offering new possibilities for managing chronic conditions and supporting aging populations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19000v1-abstract-full').style.display = 'none'; document.getElementById('2411.19000v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 figures, 35 references</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18319">arXiv:2411.18319</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.18319">pdf</a>, <a href="https://arxiv.org/format/2411.18319">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Lighthouse: An Open Research Framework for Optical Data Center Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lei%2C+Y">Yiming Lei</a>, <a href="/search/cs?searchtype=author&amp;query=De+Marchi%2C+F">Federico De Marchi</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jialong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Joshi%2C+R">Raj Joshi</a>, <a href="/search/cs?searchtype=author&amp;query=Chandrasekaran%2C+B">Balakrishnan Chandrasekaran</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yiting Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18319v1-abstract-short" style="display: inline;"> Optical data center networks (DCNs) are emerging as a promising design for cloud infrastructure. However, existing optical DCN architectures operate as closed ecosystems, tying software solutions to specific optical hardware. We introduce Lighthouse, an open research framework that decouples software from hardware, allowing them to evolve independently. Central to Lighthouse is the time-flow table&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18319v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18319v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18319v1-abstract-full" style="display: none;"> Optical data center networks (DCNs) are emerging as a promising design for cloud infrastructure. However, existing optical DCN architectures operate as closed ecosystems, tying software solutions to specific optical hardware. We introduce Lighthouse, an open research framework that decouples software from hardware, allowing them to evolve independently. Central to Lighthouse is the time-flow table abstraction, serving as a common interface between optical hardware and software. We develop Lighthouse on programmable switches, achieving a minimum optical circuit duration of 2 渭s, the shortest duration realized by commodity devices to date. We demonstrate Lighthouse&#39;s generality by implementing six optical architectures on an optical testbed and conducted extensive benchmarks on a 108-ToR setup, highlighting system efficiency. Additionally, we present case studies that identify potential research topics enabled by Lighthouse. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18319v1-abstract-full').style.display = 'none'; document.getElementById('2411.18319v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17850">arXiv:2411.17850</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.17850">pdf</a>, <a href="https://arxiv.org/format/2411.17850">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Reliability of deep learning models for anatomical landmark detection: The role of inter-rater variability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Salari%2C+S">Soorena Salari</a>, <a href="/search/cs?searchtype=author&amp;query=Rivaz%2C+H">Hassan Rivaz</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yiming Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17850v1-abstract-short" style="display: inline;"> Automated detection of anatomical landmarks plays a crucial role in many diagnostic and surgical applications. Progresses in deep learning (DL) methods have resulted in significant performance enhancement in tasks related to anatomical landmark detection. While current research focuses on accurately localizing these landmarks in medical scans, the importance of inter-rater annotation variability i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17850v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17850v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17850v1-abstract-full" style="display: none;"> Automated detection of anatomical landmarks plays a crucial role in many diagnostic and surgical applications. Progresses in deep learning (DL) methods have resulted in significant performance enhancement in tasks related to anatomical landmark detection. While current research focuses on accurately localizing these landmarks in medical scans, the importance of inter-rater annotation variability in building DL models is often overlooked. Understanding how inter-rater variability impacts the performance and reliability of the resulting DL algorithms, which are crucial for clinical deployment, can inform the improvement of training data construction and boost DL models&#39; outcomes. In this paper, we conducted a thorough study of different annotation-fusion strategies to preserve inter-rater variability in DL models for anatomical landmark detection, aiming to boost the performance and reliability of the resulting algorithms. Additionally, we explored the characteristics and reliability of four metrics, including a novel Weighted Coordinate Variance metric to quantify landmark detection uncertainty/inter-rater variability. Our research highlights the crucial connection between inter-rater variability, DL-models performances, and uncertainty, revealing how different approaches for multi-rater landmark annotation fusion can influence these factors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17850v1-abstract-full').style.display = 'none'; document.getElementById('2411.17850v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to SPIE Medical Imaging 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17845">arXiv:2411.17845</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.17845">pdf</a>, <a href="https://arxiv.org/format/2411.17845">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CAMLD: Contrast-Agnostic Medical Landmark Detection with Consistency-Based Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Salari%2C+S">Soorena Salari</a>, <a href="/search/cs?searchtype=author&amp;query=Harirpoush%2C+A">Arash Harirpoush</a>, <a href="/search/cs?searchtype=author&amp;query=Rivaz%2C+H">Hassan Rivaz</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yiming Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17845v1-abstract-short" style="display: inline;"> Anatomical landmark detection in medical images is essential for various clinical and research applications, including disease diagnosis and surgical planning. However, manual landmark annotation is time-consuming and requires significant expertise. Existing deep learning (DL) methods often require large amounts of well-annotated data, which are costly to acquire. In this paper, we introduce CAMLD&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17845v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17845v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17845v1-abstract-full" style="display: none;"> Anatomical landmark detection in medical images is essential for various clinical and research applications, including disease diagnosis and surgical planning. However, manual landmark annotation is time-consuming and requires significant expertise. Existing deep learning (DL) methods often require large amounts of well-annotated data, which are costly to acquire. In this paper, we introduce CAMLD, a novel self-supervised DL framework for anatomical landmark detection in unlabeled scans with varying contrasts by using only a single reference example. To achieve this, we employed an inter-subject landmark consistency loss with an image registration loss while introducing a 3D convolution-based contrast augmentation strategy to promote model generalization to new contrasts. Additionally, we utilize an adaptive mixed loss function to schedule the contributions of different sub-tasks for optimal outcomes. We demonstrate the proposed method with the intricate task of MRI-based 3D brain landmark detection. With comprehensive experiments on four diverse clinical and public datasets, including both T1w and T2w MRI scans at different MRI field strengths, we demonstrate that CAMLD outperforms the state-of-the-art methods in terms of mean radial errors (MREs) and success detection rates (SDRs). Our framework provides a robust and accurate solution for anatomical landmark detection, reducing the need for extensively annotated datasets and generalizing well across different imaging contrasts. Our code will be publicly available at: https://github.com/HealthX-Lab/CAMLD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17845v1-abstract-full').style.display = 'none'; document.getElementById('2411.17845v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 6 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16799">arXiv:2411.16799</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.16799">pdf</a>, <a href="https://arxiv.org/format/2411.16799">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> One is Plenty: A Polymorphic Feature Interpreter for Immutable Heterogeneous Collaborative Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yuchen Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+Q">Quan Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+G">Guiyang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+X">Xiaoyuan Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+X">Xuanhan Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+T">Tianyou Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Siheng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jinglin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16799v1-abstract-short" style="display: inline;"> Collaborative perception in autonomous driving significantly enhances the perception capabilities of individual agents. Immutable heterogeneity in collaborative perception, where agents have different and fixed perception networks, presents a major challenge due to the semantic gap in their exchanged intermediate features without modifying the perception networks. Most existing methods bridge the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16799v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16799v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16799v1-abstract-full" style="display: none;"> Collaborative perception in autonomous driving significantly enhances the perception capabilities of individual agents. Immutable heterogeneity in collaborative perception, where agents have different and fixed perception networks, presents a major challenge due to the semantic gap in their exchanged intermediate features without modifying the perception networks. Most existing methods bridge the semantic gap through interpreters. However, they either require training a new interpreter for each new agent type, limiting extensibility, or rely on a two-stage interpretation via an intermediate standardized semantic space, causing cumulative semantic loss. To achieve both extensibility in immutable heterogeneous scenarios and low-loss feature interpretation, we propose PolyInter, a polymorphic feature interpreter. It contains an extension point through which emerging new agents can seamlessly integrate by overriding only their specific prompts, which are learnable parameters intended to guide the interpretation, while reusing PolyInter&#39;s remaining parameters. By leveraging polymorphism, our design ensures that a single interpreter is sufficient to accommodate diverse agents and interpret their features into the ego agent&#39;s semantic space. Experiments conducted on the OPV2V dataset demonstrate that PolyInter improves collaborative perception precision by up to 11.1% compared to SOTA interpreters, while comparable results can be achieved by training only 1.4% of PolyInter&#39;s parameters when adapting to new agents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16799v1-abstract-full').style.display = 'none'; document.getElementById('2411.16799v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15232">arXiv:2411.15232</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.15232">pdf</a>, <a href="https://arxiv.org/format/2411.15232">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Koleilat%2C+T">Taha Koleilat</a>, <a href="/search/cs?searchtype=author&amp;query=Asgariandehkordi%2C+H">Hojat Asgariandehkordi</a>, <a href="/search/cs?searchtype=author&amp;query=Rivaz%2C+H">Hassan Rivaz</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yiming Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15232v1-abstract-short" style="display: inline;"> Recent advancements in vision-language models (VLMs), such as CLIP, have demonstrated substantial success in self-supervised representation learning for vision tasks. However, effectively adapting VLMs to downstream applications remains challenging, as their accuracy often depends on time-intensive and expertise-demanding prompt engineering, while full model fine-tuning is costly. This is particul&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15232v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15232v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15232v1-abstract-full" style="display: none;"> Recent advancements in vision-language models (VLMs), such as CLIP, have demonstrated substantial success in self-supervised representation learning for vision tasks. However, effectively adapting VLMs to downstream applications remains challenging, as their accuracy often depends on time-intensive and expertise-demanding prompt engineering, while full model fine-tuning is costly. This is particularly true for biomedical images, which, unlike natural images, typically suffer from limited annotated datasets, unintuitive image contrasts, and nuanced visual features. Recent prompt learning techniques, such as Context Optimization (CoOp) intend to tackle these issues, but still fall short in generalizability. Meanwhile, explorations in prompt learning for biomedical image analysis are still highly limited. In this work, we propose BiomedCoOp, a novel prompt learning framework that enables efficient adaptation of BiomedCLIP for accurate and highly generalizable few-shot biomedical image classification. Our approach achieves effective prompt context learning by leveraging semantic consistency with average prompt ensembles from Large Language Models (LLMs) and knowledge distillation with a statistics-based prompt selection strategy. We conducted comprehensive validation of our proposed framework on 11 medical datasets across 9 modalities and 10 organs against existing state-of-the-art methods, demonstrating significant improvements in both accuracy and generalizability. The code will be publicly available at https://github.com/HealthX-Lab/BiomedCoOp. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15232v1-abstract-full').style.display = 'none'; document.getElementById('2411.15232v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 5 figures, 10 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14875">arXiv:2411.14875</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.14875">pdf</a>, <a href="https://arxiv.org/ps/2411.14875">ps</a>, <a href="https://arxiv.org/format/2411.14875">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Iterative Reweighted Framework Based Algorithms for Sparse Linear Regression with Generalized Elastic Net Penalty </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yanyun Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Z">Zhenghua Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+P">Peili Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yunhai Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14875v1-abstract-short" style="display: inline;"> The elastic net penalty is frequently employed in high-dimensional statistics for parameter regression and variable selection. It is particularly beneficial compared to lasso when the number of predictors greatly surpasses the number of observations. However, empirical evidence has shown that the $\ell_q$-norm penalty (where $0 &lt; q &lt; 1$) often provides better regression compared to the $\ell_1$-no&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14875v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14875v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14875v1-abstract-full" style="display: none;"> The elastic net penalty is frequently employed in high-dimensional statistics for parameter regression and variable selection. It is particularly beneficial compared to lasso when the number of predictors greatly surpasses the number of observations. However, empirical evidence has shown that the $\ell_q$-norm penalty (where $0 &lt; q &lt; 1$) often provides better regression compared to the $\ell_1$-norm penalty, demonstrating enhanced robustness in various scenarios. In this paper, we explore a generalized elastic net model that employs a $\ell_r$-norm (where $r \geq 1$) in loss function to accommodate various types of noise, and employs a $\ell_q$-norm (where $0 &lt; q &lt; 1$) to replace the $\ell_1$-norm in elastic net penalty. Theoretically, we establish the computable lower bounds for the nonzero entries of the generalized first-order stationary points of the proposed generalized elastic net model. For implementation, we develop two efficient algorithms based on the locally Lipschitz continuous $蔚$-approximation to $\ell_q$-norm. The first algorithm employs an alternating direction method of multipliers (ADMM), while the second utilizes a proximal majorization-minimization method (PMM), where the subproblems are addressed using the semismooth Newton method (SNN). We also perform extensive numerical experiments with both simulated and real data, showing that both algorithms demonstrate superior performance. Notably, the PMM-SSN is efficient than ADMM, even though the latter provides a simpler implementation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14875v1-abstract-full').style.display = 'none'; document.getElementById('2411.14875v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14479">arXiv:2411.14479</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.14479">pdf</a>, <a href="https://arxiv.org/format/2411.14479">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GRL-Prompt: Towards Knowledge Graph based Prompt Optimization via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yuze Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tingjie Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tiehua Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Youhua Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jinze Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Z">Zhishu Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+J">Jiong Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+F+R">Fei Richard Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14479v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated impressive success in a wide range of natural language processing (NLP) tasks due to their extensive general knowledge of the world. Recent works discovered that the performance of LLMs is heavily dependent on the input prompt. However, prompt engineering is usually done manually in a trial-and-error fashion, which can be labor-intensive and challengi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14479v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14479v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14479v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated impressive success in a wide range of natural language processing (NLP) tasks due to their extensive general knowledge of the world. Recent works discovered that the performance of LLMs is heavily dependent on the input prompt. However, prompt engineering is usually done manually in a trial-and-error fashion, which can be labor-intensive and challenging in order to find the optimal prompts. To address these problems and unleash the utmost potential of LLMs, we propose a novel LLMs-agnostic framework for prompt optimization, namely GRL-Prompt, which aims to automatically construct optimal prompts via reinforcement learning (RL) in an end-to-end manner. To provide structured action/state representation for optimizing prompts, we construct a knowledge graph (KG) that better encodes the correlation between the user query and candidate in-context examples. Furthermore, a policy network is formulated to generate the optimal action by selecting a set of in-context examples in a rewardable order to construct the prompt. Additionally, the embedding-based reward shaping is utilized to stabilize the RL training process. The experimental results show that GRL-Prompt outperforms recent state-of-the-art methods, achieving an average increase of 0.10 in ROUGE-1, 0.07 in ROUGE-2, 0.07 in ROUGE-L, and 0.05 in BLEU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14479v1-abstract-full').style.display = 'none'; document.getElementById('2411.14479v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13017">arXiv:2411.13017</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.13017">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Breaking the Cycle of Recurring Failures: Applying Generative AI to Root Cause Analysis in Legacy Banking Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jin%2C+S">Siyuan Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Bei%2C+Z">Zhendong Bei</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+B">Bichao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yong Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13017v1-abstract-short" style="display: inline;"> Traditional banks face significant challenges in digital transformation, primarily due to legacy system constraints and fragmented ownership. Recent incidents show that such fragmentation often results in superficial incident resolutions, leaving root causes unaddressed and causing recurring failures. We introduce a novel approach to post-incident analysis, integrating knowledge-based GenAI agents&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13017v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13017v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13017v1-abstract-full" style="display: none;"> Traditional banks face significant challenges in digital transformation, primarily due to legacy system constraints and fragmented ownership. Recent incidents show that such fragmentation often results in superficial incident resolutions, leaving root causes unaddressed and causing recurring failures. We introduce a novel approach to post-incident analysis, integrating knowledge-based GenAI agents with the &#34;Five Whys&#34; technique to examine problem descriptions and change request data. This method uncovered that approximately 70% of the incidents previously attributed to management or vendor failures were due to underlying internal code issues. We present a case study to show the impact of our method. By scanning over 5,000 projects, we identified over 400 files with a similar root cause. Overall, we leverage the knowledge-based agents to automate and elevate root cause analysis, transforming it into a more proactive process. These agents can be applied across other phases of the software development lifecycle, further improving development processes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13017v1-abstract-full').style.display = 'none'; document.getElementById('2411.13017v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12195">arXiv:2411.12195</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.12195">pdf</a>, <a href="https://arxiv.org/format/2411.12195">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Medical Vision-and-Language Applications and Their Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Q">Qi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+R">Ruoshan Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Sinuo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Phan%2C+V+M+H">Vu Minh Hieu Phan</a>, <a href="/search/cs?searchtype=author&amp;query=Hengel%2C+A+v+d">Anton van den Hengel</a>, <a href="/search/cs?searchtype=author&amp;query=Verjans%2C+J">Johan Verjans</a>, <a href="/search/cs?searchtype=author&amp;query=Liao%2C+Z">Zhibin Liao</a>, <a href="/search/cs?searchtype=author&amp;query=To%2C+M">Minh-Son To</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yong Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jian Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Y">Yutong Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Q">Qi Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12195v1-abstract-short" style="display: inline;"> Medical vision-and-language models (MVLMs) have attracted substantial interest due to their capability to offer a natural language interface for interpreting complex medical data. Their applications are versatile and have the potential to improve diagnostic accuracy and decision-making for individual patients while also contributing to enhanced public health monitoring, disease surveillance, and p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12195v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12195v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12195v1-abstract-full" style="display: none;"> Medical vision-and-language models (MVLMs) have attracted substantial interest due to their capability to offer a natural language interface for interpreting complex medical data. Their applications are versatile and have the potential to improve diagnostic accuracy and decision-making for individual patients while also contributing to enhanced public health monitoring, disease surveillance, and policy-making through more efficient analysis of large data sets. MVLMS integrate natural language processing with medical images to enable a more comprehensive and contextual understanding of medical images alongside their corresponding textual information. Unlike general vision-and-language models trained on diverse, non-specialized datasets, MVLMs are purpose-built for the medical domain, automatically extracting and interpreting critical information from medical images and textual reports to support clinical decision-making. Popular clinical applications of MVLMs include automated medical report generation, medical visual question answering, medical multimodal segmentation, diagnosis and prognosis and medical image-text retrieval. Here, we provide a comprehensive overview of MVLMs and the various medical tasks to which they have been applied. We conduct a detailed analysis of various vision-and-language model architectures, focusing on their distinct strategies for cross-modal integration/exploitation of medical visual and textual features. We also examine the datasets used for these tasks and compare the performance of different models based on standardized evaluation metrics. Furthermore, we highlight potential challenges and summarize future research trends and directions. The full collection of papers and codes is available at: https://github.com/YtongXie/Medical-Vision-and-Language-Tasks-and-Methodologies-A-Survey. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12195v1-abstract-full').style.display = 'none'; document.getElementById('2411.12195v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12151">arXiv:2411.12151</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.12151">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Self-Supervised Learning in Deep Networks: A Pathway to Robust Few-Shot Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yuyang Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12151v1-abstract-short" style="display: inline;"> This study aims to optimize the few-shot image classification task and improve the model&#39;s feature extraction and classification performance by combining self-supervised learning with the deep network model ResNet-101. During the training process, we first pre-train the model with self-supervision to enable it to learn common feature expressions on a large amount of unlabeled data; then fine-tune&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12151v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12151v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12151v1-abstract-full" style="display: none;"> This study aims to optimize the few-shot image classification task and improve the model&#39;s feature extraction and classification performance by combining self-supervised learning with the deep network model ResNet-101. During the training process, we first pre-train the model with self-supervision to enable it to learn common feature expressions on a large amount of unlabeled data; then fine-tune it on the few-shot dataset Mini-ImageNet to improve the model&#39;s accuracy and generalization ability under limited data. The experimental results show that compared with traditional convolutional neural networks, ResNet-50, DenseNet, and other models, our method has achieved excellent performance of about 95.12% in classification accuracy (ACC) and F1 score, verifying the effectiveness of self-supervised learning in few-shot classification. This method provides an efficient and reliable solution for the field of few-shot image classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12151v1-abstract-full').style.display = 'none'; document.getElementById('2411.12151v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11370">arXiv:2411.11370</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.11370">pdf</a>, <a href="https://arxiv.org/format/2411.11370">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TL-CLIP: A Power-specific Multimodal Pre-trained Visual Foundation Model for Transmission Line Defect Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Ke Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zhaoye Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yurong Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiacun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiyuan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yangjie Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11370v1-abstract-short" style="display: inline;"> Transmission line defect recognition models have traditionally used general pre-trained weights as the initial basis for their training. These models often suffer weak generalization capability due to the lack of domain knowledge in the pre-training dataset. To address this issue, we propose a two-stage transmission-line-oriented contrastive language-image pre-training (TL-CLIP) framework, which l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11370v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11370v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11370v1-abstract-full" style="display: none;"> Transmission line defect recognition models have traditionally used general pre-trained weights as the initial basis for their training. These models often suffer weak generalization capability due to the lack of domain knowledge in the pre-training dataset. To address this issue, we propose a two-stage transmission-line-oriented contrastive language-image pre-training (TL-CLIP) framework, which lays a more effective foundation for transmission line defect recognition. The pre-training process employs a novel power-specific multimodal algorithm assisted with two power-specific pre-training tasks for better modeling the power-related semantic knowledge contained in the inspection data. To fine-tune the pre-trained model, we develop a transfer learning strategy, namely fine-tuning with pre-training objective (FTP), to alleviate the overfitting problem caused by limited inspection data. Experimental results demonstrate that the proposed method significantly improves the performance of transmission line defect recognition in both classification and detection tasks, indicating clear advantages over traditional pre-trained models in the scene of transmission line inspection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11370v1-abstract-full').style.display = 'none'; document.getElementById('2411.11370v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10136">arXiv:2411.10136</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10136">pdf</a>, <a href="https://arxiv.org/format/2411.10136">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CoSAM: Self-Correcting SAM for Domain Generalization in 2D Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Y">Yihang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Ziyang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Y">Yiwen Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+X">Xingliang Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhisong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yong Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10136v1-abstract-short" style="display: inline;"> Medical images often exhibit distribution shifts due to variations in imaging protocols and scanners across different medical centers. Domain Generalization (DG) methods aim to train models on source domains that can generalize to unseen target domains. Recently, the segment anything model (SAM) has demonstrated strong generalization capabilities due to its prompt-based design, and has gained sign&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10136v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10136v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10136v1-abstract-full" style="display: none;"> Medical images often exhibit distribution shifts due to variations in imaging protocols and scanners across different medical centers. Domain Generalization (DG) methods aim to train models on source domains that can generalize to unseen target domains. Recently, the segment anything model (SAM) has demonstrated strong generalization capabilities due to its prompt-based design, and has gained significant attention in image segmentation tasks. Existing SAM-based approaches attempt to address the need for manual prompts by introducing prompt generators that automatically generate these prompts. However, we argue that auto-generated prompts may not be sufficiently accurate under distribution shifts, potentially leading to incorrect predictions that still require manual verification and correction by clinicians. To address this challenge, we propose a method for 2D medical image segmentation called Self-Correcting SAM (CoSAM). Our approach begins by generating coarse masks using SAM in a prompt-free manner, providing prior prompts for the subsequent stages, and eliminating the need for prompt generators. To automatically refine these coarse masks, we introduce a generalized error decoder that simulates the correction process typically performed by clinicians. Furthermore, we generate diverse prompts as feedback based on the corrected masks, which are used to iteratively refine the predictions within a self-correcting loop, enhancing the generalization performance of our model. Extensive experiments on two medical image segmentation benchmarks across multiple scenarios demonstrate the superiority of CoSAM over state-of-the-art SAM-based methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10136v1-abstract-full').style.display = 'none'; document.getElementById('2411.10136v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10032">arXiv:2411.10032</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10032">pdf</a>, <a href="https://arxiv.org/format/2411.10032">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> VMID: A Multimodal Fusion LLM Framework for Detecting and Identifying Misinformation of Short Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+W">Weihao Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yinhao Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+M">Minghui Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+X">Xiuzhen Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10032v1-abstract-short" style="display: inline;"> Short video platforms have become important channels for news dissemination, offering a highly engaging and immediate way for users to access current events and share information. However, these platforms have also emerged as significant conduits for the rapid spread of misinformation, as fake news and rumors can leverage the visual appeal and wide reach of short videos to circulate extensively am&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10032v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10032v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10032v1-abstract-full" style="display: none;"> Short video platforms have become important channels for news dissemination, offering a highly engaging and immediate way for users to access current events and share information. However, these platforms have also emerged as significant conduits for the rapid spread of misinformation, as fake news and rumors can leverage the visual appeal and wide reach of short videos to circulate extensively among audiences. Existing fake news detection methods mainly rely on single-modal information, such as text or images, or apply only basic fusion techniques, limiting their ability to handle the complex, multi-layered information inherent in short videos. To address these limitations, this paper presents a novel fake news detection method based on multimodal information, designed to identify misinformation through a multi-level analysis of video content. This approach effectively utilizes different modal representations to generate a unified textual description, which is then fed into a large language model for comprehensive evaluation. The proposed framework successfully integrates multimodal features within videos, significantly enhancing the accuracy and reliability of fake news detection. Experimental results demonstrate that the proposed approach outperforms existing models in terms of accuracy, robustness, and utilization of multimodal information, achieving an accuracy of 90.93%, which is significantly higher than the best baseline model (SV-FEND) at 81.05%. Furthermore, case studies provide additional evidence of the effectiveness of the approach in accurately distinguishing between fake news, debunking content, and real incidents, highlighting its reliability and robustness in real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10032v1-abstract-full').style.display = 'none'; document.getElementById('2411.10032v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2211.10973 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10027">arXiv:2411.10027</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10027">pdf</a>, <a href="https://arxiv.org/format/2411.10027">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> XLSR-Mamba: A Dual-Column Bidirectional State Space Model for Spoofing Attack Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yang Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+R+K">Rohan Kumar Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10027v1-abstract-short" style="display: inline;"> Transformers and their variants have achieved great success in speech processing. However, their multi-head self-attention mechanism is computationally expensive. Therefore, one novel selective state space model, Mamba, has been proposed as an alternative. Building on its success in automatic speech recognition, we apply Mamba for spoofing attack detection. Mamba is well-suited for this task as it&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10027v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10027v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10027v1-abstract-full" style="display: none;"> Transformers and their variants have achieved great success in speech processing. However, their multi-head self-attention mechanism is computationally expensive. Therefore, one novel selective state space model, Mamba, has been proposed as an alternative. Building on its success in automatic speech recognition, we apply Mamba for spoofing attack detection. Mamba is well-suited for this task as it can capture the artifacts in spoofed speech signals by handling long-length sequences. However, Mamba&#39;s performance may suffer when it is trained with limited labeled data. To mitigate this, we propose combining a new structure of Mamba based on a dual-column architecture with self-supervised learning, using the pre-trained wav2vec 2.0 model. The experiments show that our proposed approach achieves competitive results and faster inference on the ASVspoof 2021 LA and DF datasets, and on the more challenging In-the-Wild dataset, it emerges as the strongest candidate for spoofing attack detection. The code will be publicly released in due course. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10027v1-abstract-full').style.display = 'none'; document.getElementById('2411.10027v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09593">arXiv:2411.09593</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.09593">pdf</a>, <a href="https://arxiv.org/format/2411.09593">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SMILE-UHURA Challenge -- Small Vessel Segmentation at Mesoscopic Scale from Ultra-High Resolution 7T Magnetic Resonance Angiograms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chatterjee%2C+S">Soumick Chatterjee</a>, <a href="/search/cs?searchtype=author&amp;query=Mattern%2C+H">Hendrik Mattern</a>, <a href="/search/cs?searchtype=author&amp;query=D%C3%B6rner%2C+M">Marc D枚rner</a>, <a href="/search/cs?searchtype=author&amp;query=Sciarra%2C+A">Alessandro Sciarra</a>, <a href="/search/cs?searchtype=author&amp;query=Dubost%2C+F">Florian Dubost</a>, <a href="/search/cs?searchtype=author&amp;query=Schnurre%2C+H">Hannes Schnurre</a>, <a href="/search/cs?searchtype=author&amp;query=Khatun%2C+R">Rupali Khatun</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+C">Chun-Chih Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Hsieh%2C+T">Tsung-Lin Hsieh</a>, <a href="/search/cs?searchtype=author&amp;query=Tsai%2C+Y">Yi-Shan Tsai</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+Y">Yi-Zeng Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yung-Ching Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Juinn-Dar Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+M">Marshall Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+S">Siyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Ribeiro%2C+F+L">Fernanda L. Ribeiro</a>, <a href="/search/cs?searchtype=author&amp;query=Bollmann%2C+S">Saskia Bollmann</a>, <a href="/search/cs?searchtype=author&amp;query=Chintalapati%2C+K+V">Karthikesh Varma Chintalapati</a>, <a href="/search/cs?searchtype=author&amp;query=Radhakrishna%2C+C+M">Chethan Mysuru Radhakrishna</a>, <a href="/search/cs?searchtype=author&amp;query=Kumara%2C+S+C+H+R">Sri Chandana Hudukula Ram Kumara</a>, <a href="/search/cs?searchtype=author&amp;query=Sutrave%2C+R">Raviteja Sutrave</a>, <a href="/search/cs?searchtype=author&amp;query=Qayyum%2C+A">Abdul Qayyum</a>, <a href="/search/cs?searchtype=author&amp;query=Mazher%2C+M">Moona Mazher</a>, <a href="/search/cs?searchtype=author&amp;query=Razzak%2C+I">Imran Razzak</a>, <a href="/search/cs?searchtype=author&amp;query=Rodero%2C+C">Cristobal Rodero</a> , et al. (23 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09593v1-abstract-short" style="display: inline;"> The human brain receives nutrients and oxygen through an intricate network of blood vessels. Pathology affecting small vessels, at the mesoscopic scale, represents a critical vulnerability within the cerebral blood supply and can lead to severe conditions, such as Cerebral Small Vessel Diseases. The advent of 7 Tesla MRI systems has enabled the acquisition of higher spatial resolution images, maki&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09593v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09593v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09593v1-abstract-full" style="display: none;"> The human brain receives nutrients and oxygen through an intricate network of blood vessels. Pathology affecting small vessels, at the mesoscopic scale, represents a critical vulnerability within the cerebral blood supply and can lead to severe conditions, such as Cerebral Small Vessel Diseases. The advent of 7 Tesla MRI systems has enabled the acquisition of higher spatial resolution images, making it possible to visualise such vessels in the brain. However, the lack of publicly available annotated datasets has impeded the development of robust, machine learning-driven segmentation algorithms. To address this, the SMILE-UHURA challenge was organised. This challenge, held in conjunction with the ISBI 2023, in Cartagena de Indias, Colombia, aimed to provide a platform for researchers working on related topics. The SMILE-UHURA challenge addresses the gap in publicly available annotated datasets by providing an annotated dataset of Time-of-Flight angiography acquired with 7T MRI. This dataset was created through a combination of automated pre-segmentation and extensive manual refinement. In this manuscript, sixteen submitted methods and two baseline methods are compared both quantitatively and qualitatively on two different datasets: held-out test MRAs from the same dataset as the training data (with labels kept secret) and a separate 7T ToF MRA dataset where both input volumes and labels are kept secret. The results demonstrate that most of the submitted deep learning methods, trained on the provided training dataset, achieved reliable segmentation performance. Dice scores reached up to 0.838 $\pm$ 0.066 and 0.716 $\pm$ 0.125 on the respective datasets, with an average performance of up to 0.804 $\pm$ 0.15. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09593v1-abstract-full').style.display = 'none'; document.getElementById('2411.09593v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08900">arXiv:2411.08900</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.08900">pdf</a>, <a href="https://arxiv.org/format/2411.08900">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> RNA-GPT: Multimodal Generative System for RNA Sequence Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yijia Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+E">Edward Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Y">Yiqiao Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08900v1-abstract-short" style="display: inline;"> RNAs are essential molecules that carry genetic information vital for life, with profound implications for drug development and biotechnology. Despite this importance, RNA research is often hindered by the vast literature available on the topic. To streamline this process, we introduce RNA-GPT, a multi-modal RNA chat model designed to simplify RNA discovery by leveraging extensive RNA literature.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08900v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08900v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08900v1-abstract-full" style="display: none;"> RNAs are essential molecules that carry genetic information vital for life, with profound implications for drug development and biotechnology. Despite this importance, RNA research is often hindered by the vast literature available on the topic. To streamline this process, we introduce RNA-GPT, a multi-modal RNA chat model designed to simplify RNA discovery by leveraging extensive RNA literature. RNA-GPT integrates RNA sequence encoders with linear projection layers and state-of-the-art large language models (LLMs) for precise representation alignment, enabling it to process user-uploaded RNA sequences and deliver concise, accurate responses. Built on a scalable training pipeline, RNA-GPT utilizes RNA-QA, an automated system that gathers RNA annotations from RNACentral using a divide-and-conquer approach with GPT-4o and latent Dirichlet allocation (LDA) to efficiently handle large datasets and generate instruction-tuning samples. Our experiments indicate that RNA-GPT effectively addresses complex RNA queries, thereby facilitating RNA research. Additionally, we present RNA-QA, a dataset of 407,616 RNA samples for modality alignment and instruction tuning, further advancing the potential of RNA research tools. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08900v1-abstract-full').style.display = 'none'; document.getElementById('2411.08900v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Machine Learning for Structural Biology Workshop, NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07207">arXiv:2411.07207</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.07207">pdf</a>, <a href="https://arxiv.org/format/2411.07207">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> General Geospatial Inference with a Population Dynamics Foundation Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Agarwal%2C+M">Mohit Agarwal</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+M">Mimi Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Kamath%2C+C">Chaitanya Kamath</a>, <a href="/search/cs?searchtype=author&amp;query=Muslim%2C+A">Arbaaz Muslim</a>, <a href="/search/cs?searchtype=author&amp;query=Sarker%2C+P">Prithul Sarker</a>, <a href="/search/cs?searchtype=author&amp;query=Paul%2C+J">Joydeep Paul</a>, <a href="/search/cs?searchtype=author&amp;query=Yee%2C+H">Hector Yee</a>, <a href="/search/cs?searchtype=author&amp;query=Sieniek%2C+M">Marcin Sieniek</a>, <a href="/search/cs?searchtype=author&amp;query=Jablonski%2C+K">Kim Jablonski</a>, <a href="/search/cs?searchtype=author&amp;query=Mayer%2C+Y">Yael Mayer</a>, <a href="/search/cs?searchtype=author&amp;query=Fork%2C+D">David Fork</a>, <a href="/search/cs?searchtype=author&amp;query=de+Guia%2C+S">Sheila de Guia</a>, <a href="/search/cs?searchtype=author&amp;query=McPike%2C+J">Jamie McPike</a>, <a href="/search/cs?searchtype=author&amp;query=Boulanger%2C+A">Adam Boulanger</a>, <a href="/search/cs?searchtype=author&amp;query=Shekel%2C+T">Tomer Shekel</a>, <a href="/search/cs?searchtype=author&amp;query=Schottlander%2C+D">David Schottlander</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yao Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Manukonda%2C+M+C">Manjit Chakravarthy Manukonda</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yun Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Bulut%2C+N">Neslihan Bulut</a>, <a href="/search/cs?searchtype=author&amp;query=Abu-el-haija%2C+S">Sami Abu-el-haija</a>, <a href="/search/cs?searchtype=author&amp;query=Eigenwillig%2C+A">Arno Eigenwillig</a>, <a href="/search/cs?searchtype=author&amp;query=Kothari%2C+P">Parth Kothari</a>, <a href="/search/cs?searchtype=author&amp;query=Perozzi%2C+B">Bryan Perozzi</a>, <a href="/search/cs?searchtype=author&amp;query=Bharel%2C+M">Monica Bharel</a> , et al. (9 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07207v2-abstract-short" style="display: inline;"> Supporting the health and well-being of dynamic populations around the world requires governmental agencies, organizations and researchers to understand and reason over complex relationships between human behavior and local contexts in order to identify high-risk groups and strategically allocate limited resources. Traditional approaches to these classes of problems often entail developing manuall&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07207v2-abstract-full').style.display = 'inline'; document.getElementById('2411.07207v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07207v2-abstract-full" style="display: none;"> Supporting the health and well-being of dynamic populations around the world requires governmental agencies, organizations and researchers to understand and reason over complex relationships between human behavior and local contexts in order to identify high-risk groups and strategically allocate limited resources. Traditional approaches to these classes of problems often entail developing manually curated, task-specific features and models to represent human behavior and the natural and built environment, which can be challenging to adapt to new, or even, related tasks. To address this, we introduce a Population Dynamics Foundation Model (PDFM) that aims to capture the relationships between diverse data modalities and is applicable to a broad range of geospatial tasks. We first construct a geo-indexed dataset for postal codes and counties across the United States, capturing rich aggregated information on human behavior from maps, busyness, and aggregated search trends, and environmental factors such as weather and air quality. We then model this data and the complex relationships between locations using a graph neural network, producing embeddings that can be adapted to a wide range of downstream tasks using relatively simple models. We evaluate the effectiveness of our approach by benchmarking it on 27 downstream tasks spanning three distinct domains: health indicators, socioeconomic factors, and environmental measurements. The approach achieves state-of-the-art performance on all 27 geospatial interpolation tasks, and on 25 out of the 27 extrapolation and super-resolution tasks. We combined the PDFM with a state-of-the-art forecasting foundation model, TimesFM, to predict unemployment and poverty, achieving performance that surpasses fully supervised forecasting. The full set of embeddings and sample code are publicly available for researchers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07207v2-abstract-full').style.display = 'none'; document.getElementById('2411.07207v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 16 figures, preprint; v2: updated github url</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07123">arXiv:2411.07123</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.07123">pdf</a>, <a href="https://arxiv.org/format/2411.07123">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Fast and Robust Contextual Node Representation Learning over Dynamic Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+X">Xingzhi Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Silong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+B">Baojian Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yanghua Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Skiena%2C+S">Steven Skiena</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07123v1-abstract-short" style="display: inline;"> Real-world graphs grow rapidly with edge and vertex insertions over time, motivating the problem of efficiently maintaining robust node representation over evolving graphs. Recent efficient GNNs are designed to decouple recursive message passing from the learning process, and favor Personalized PageRank (PPR) as the underlying feature propagation mechanism. However, most PPR-based GNNs are designe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07123v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07123v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07123v1-abstract-full" style="display: none;"> Real-world graphs grow rapidly with edge and vertex insertions over time, motivating the problem of efficiently maintaining robust node representation over evolving graphs. Recent efficient GNNs are designed to decouple recursive message passing from the learning process, and favor Personalized PageRank (PPR) as the underlying feature propagation mechanism. However, most PPR-based GNNs are designed for static graphs, and efficient PPR maintenance remains as an open problem. Further, there is surprisingly little theoretical justification for the choice of PPR, despite its impressive empirical performance. In this paper, we are inspired by the recent PPR formulation as an explicit $\ell_1$-regularized optimization problem and propose a unified dynamic graph learning framework based on sparse node-wise attention. We also present a set of desired properties to justify the choice of PPR in STOA GNNs, and serves as the guideline for future node attention designs. Meanwhile, we take advantage of the PPR-equivalent optimization formulation and employ the proximal gradient method (ISTA) to improve the efficiency of PPR-based GNNs upto 6 times. Finally, we instantiate a simple-yet-effective model (\textsc{GoPPE}) with robust positional encodings by maximizing PPR previously used as attention. The model performs comparably to or better than the STOA baselines and greatly outperforms when the initial node attributes are noisy during graph evolution, demonstrating the effectiveness and robustness of \textsc{GoPPE}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07123v1-abstract-full').style.display = 'none'; document.getElementById('2411.07123v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05400">arXiv:2411.05400</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.05400">pdf</a>, <a href="https://arxiv.org/format/2411.05400">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Palermo: Improving the Performance of Oblivious Memory using Protocol-Hardware Co-Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ye%2C+H">Haojie Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yuchen Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuhan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+K">Kuan-Yu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+Y">Yichao Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+S">Shuwen Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Kasikci%2C+B">Baris Kasikci</a>, <a href="/search/cs?searchtype=author&amp;query=Mudge%2C+T">Trevor Mudge</a>, <a href="/search/cs?searchtype=author&amp;query=Talati%2C+N">Nishil Talati</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05400v1-abstract-short" style="display: inline;"> Oblivious RAM (ORAM) hides the memory access patterns, enhancing data privacy by preventing attackers from discovering sensitive information based on the sequence of memory accesses. The performance of ORAM is often limited by its inherent trade-off between security and efficiency, as concealing memory access patterns imposes significant computational and memory overhead. While prior works focus o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05400v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05400v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05400v1-abstract-full" style="display: none;"> Oblivious RAM (ORAM) hides the memory access patterns, enhancing data privacy by preventing attackers from discovering sensitive information based on the sequence of memory accesses. The performance of ORAM is often limited by its inherent trade-off between security and efficiency, as concealing memory access patterns imposes significant computational and memory overhead. While prior works focus on improving the ORAM performance by prefetching and eliminating ORAM requests, we find that their performance is very sensitive to workload locality behavior and incurs additional management overhead caused by the ORAM stash pressure. This paper presents Palermo: a protocol-hardware co-design to improve ORAM performance. The key observation in Palermo is that classical ORAM protocols enforce restrictive dependencies between memory operations that result in low memory bandwidth utilization. Palermo introduces a new protocol that overlaps large portions of memory operations, within a single and between multiple ORAM requests, without breaking correctness and security guarantees. Subsequently, we propose an ORAM controller architecture that executes the proposed protocol to service ORAM requests. The hardware is responsible for concurrently issuing memory requests as well as imposing the necessary dependencies to ensure a consistent view of the ORAM tree across requests. Using a rich workload mix, we demonstrate that Palermo outperforms the RingORAM baseline by 2.8x, on average, incurring a negligible area overhead of 5.78mm^2 (less than 2% in 12th generation Intel CPU after technology scaling) and 2.14W without sacrificing security. We further show that Palermo also outperforms the state-of-the-art works PageORAM, PrORAM, and IR-ORAM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05400v1-abstract-full').style.display = 'none'; document.getElementById('2411.05400v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in HPCA&#39;25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05346">arXiv:2411.05346</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.05346">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Reinforcement Learning for Adaptive Resource Scheduling in Complex System Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+P">Pochun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yuyang Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+J">Jinghua Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xiaoye Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05346v1-abstract-short" style="display: inline;"> This study presents a novel computer system performance optimization and adaptive workload management scheduling algorithm based on Q-learning. In modern computing environments, characterized by increasing data volumes, task complexity, and dynamic workloads, traditional static scheduling methods such as Round-Robin and Priority Scheduling fail to meet the demands of efficient resource allocation&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05346v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05346v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05346v1-abstract-full" style="display: none;"> This study presents a novel computer system performance optimization and adaptive workload management scheduling algorithm based on Q-learning. In modern computing environments, characterized by increasing data volumes, task complexity, and dynamic workloads, traditional static scheduling methods such as Round-Robin and Priority Scheduling fail to meet the demands of efficient resource allocation and real-time adaptability. By contrast, Q-learning, a reinforcement learning algorithm, continuously learns from system state changes, enabling dynamic scheduling and resource optimization. Through extensive experiments, the superiority of the proposed approach is demonstrated in both task completion time and resource utilization, outperforming traditional and dynamic resource allocation (DRA) algorithms. These findings are critical as they highlight the potential of intelligent scheduling algorithms based on reinforcement learning to address the growing complexity and unpredictability of computing environments. This research provides a foundation for the integration of AI-driven adaptive scheduling in future large-scale systems, offering a scalable, intelligent solution to enhance system performance, reduce operating costs, and support sustainable energy consumption. The broad applicability of this approach makes it a promising candidate for next-generation computing frameworks, such as edge computing, cloud computing, and the Internet of Things. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05346v1-abstract-full').style.display = 'none'; document.getElementById('2411.05346v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04865">arXiv:2411.04865</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.04865">pdf</a>, <a href="https://arxiv.org/format/2411.04865">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ZAHA: Introducing the Level of Facade Generalization and the Large-Scale Point Cloud Facade Semantic Segmentation Benchmark Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wysocki%2C+O">Olaf Wysocki</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+Y">Yue Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Froech%2C+T">Thomas Froech</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yan Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Wysocki%2C+M">Magdalena Wysocki</a>, <a href="/search/cs?searchtype=author&amp;query=Hoegner%2C+L">Ludwig Hoegner</a>, <a href="/search/cs?searchtype=author&amp;query=Cremers%2C+D">Daniel Cremers</a>, <a href="/search/cs?searchtype=author&amp;query=Holst%2C+C">Christoph Holst</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04865v3-abstract-short" style="display: inline;"> Facade semantic segmentation is a long-standing challenge in photogrammetry and computer vision. Although the last decades have witnessed the influx of facade segmentation methods, there is a lack of comprehensive facade classes and data covering the architectural variability. In ZAHA, we introduce Level of Facade Generalization (LoFG), novel hierarchical facade classes designed based on internati&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04865v3-abstract-full').style.display = 'inline'; document.getElementById('2411.04865v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04865v3-abstract-full" style="display: none;"> Facade semantic segmentation is a long-standing challenge in photogrammetry and computer vision. Although the last decades have witnessed the influx of facade segmentation methods, there is a lack of comprehensive facade classes and data covering the architectural variability. In ZAHA, we introduce Level of Facade Generalization (LoFG), novel hierarchical facade classes designed based on international urban modeling standards, ensuring compatibility with real-world challenging classes and uniform methods&#39; comparison. Realizing the LoFG, we present to date the largest semantic 3D facade segmentation dataset, providing 601 million annotated points at five and 15 classes of LoFG2 and LoFG3, respectively. Moreover, we analyze the performance of baseline semantic segmentation methods on our introduced LoFG classes and data, complementing it with a discussion on the unresolved challenges for facade segmentation. We firmly believe that ZAHA shall facilitate further development of 3D facade semantic segmentation methods, enabling robust segmentation indispensable in creating urban digital twins. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04865v3-abstract-full').style.display = 'none'; document.getElementById('2411.04865v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to WACV 2025 (IEEE/CVF Winter Conference on Applications of Computer Vision (WACV))</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04715">arXiv:2411.04715</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.04715">pdf</a>, <a href="https://arxiv.org/format/2411.04715">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> NeuroFly: A framework for whole-brain single neuron reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+R">Rubin Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shiqi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yi%2C+Z">Zijian Yi</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yanyang Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+F">Fang Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+P">Pencheng Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04715v1-abstract-short" style="display: inline;"> Neurons, with their elongated, tree-like dendritic and axonal structures, enable efficient signal integration and long-range communication across brain regions. By reconstructing individual neurons&#39; morphology, we can gain valuable insights into brain connectivity, revealing the structure basis of cognition, movement, and perception. Despite the accumulation of extensive 3D microscopic imaging dat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04715v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04715v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04715v1-abstract-full" style="display: none;"> Neurons, with their elongated, tree-like dendritic and axonal structures, enable efficient signal integration and long-range communication across brain regions. By reconstructing individual neurons&#39; morphology, we can gain valuable insights into brain connectivity, revealing the structure basis of cognition, movement, and perception. Despite the accumulation of extensive 3D microscopic imaging data, progress has been considerably hindered by the absence of automated tools to streamline this process. Here we introduce NeuroFly, a validated framework for large-scale automatic single neuron reconstruction. This framework breaks down the process into three distinct stages: segmentation, connection, and proofreading. In the segmentation stage, we perform automatic segmentation followed by skeletonization to generate over-segmented neuronal fragments without branches. During the connection stage, we use a 3D image-based path following approach to extend each fragment and connect it with other fragments of the same neuron. Finally, human annotators are required only to proofread the few unresolved positions. The first two stages of our process are clearly defined computer vision problems, and we have trained robust baseline models to solve them. We validated NeuroFly&#39;s efficiency using in-house datasets that include a variety of challenging scenarios, such as dense arborizations, weak axons, images with contamination. We will release the datasets along with a suite of visualization and annotation tools for better reproducibility. Our goal is to foster collaboration among researchers to address the neuron reconstruction challenge, ultimately accelerating advancements in neuroscience research. The dataset and code are available at https://github.com/beanli161514/neurofly <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04715v1-abstract-full').style.display = 'none'; document.getElementById('2411.04715v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03675">arXiv:2411.03675</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03675">pdf</a>, <a href="https://arxiv.org/format/2411.03675">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> QUILL: Quotation Generation Enhancement of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+J">Jin Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+B">Bowei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+Q">Qianyu He</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+J">Jiaqing Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+F">Feng Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jinglei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+Z">Zujie Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+D">Deqing Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yanghua Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03675v1-abstract-short" style="display: inline;"> While Large language models (LLMs) have become excellent writing assistants, they still struggle with quotation generation. This is because they either hallucinate when providing factual quotations or fail to provide quotes that exceed human expectations. To bridge the gap, we systematically study how to evaluate and improve LLMs&#39; performance in quotation generation tasks. We first establish a hol&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03675v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03675v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03675v1-abstract-full" style="display: none;"> While Large language models (LLMs) have become excellent writing assistants, they still struggle with quotation generation. This is because they either hallucinate when providing factual quotations or fail to provide quotes that exceed human expectations. To bridge the gap, we systematically study how to evaluate and improve LLMs&#39; performance in quotation generation tasks. We first establish a holistic and automatic evaluation system for quotation generation task, which consists of five criteria each with corresponding automatic metric. To improve the LLMs&#39; quotation generation abilities, we construct a bilingual knowledge base that is broad in scope and rich in dimensions, containing up to 32,022 quotes. Moreover, guided by our critiria, we further design a quotation-specific metric to rerank the retrieved quotations from the knowledge base. Extensive experiments show that our metrics strongly correlate with human preferences. Existing LLMs struggle to generate desired quotes, but our quotation knowledge base and reranking metric help narrow this gap. Our dataset and code are publicly available at https://github.com/GraceXiaoo/QUILL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03675v1-abstract-full').style.display = 'none'; document.getElementById('2411.03675v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03670">arXiv:2411.03670</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03670">pdf</a>, <a href="https://arxiv.org/format/2411.03670">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Touchstone Benchmark: Are We on the Right Way for Evaluating AI Algorithms for Medical Segmentation? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bassi%2C+P+R+A+S">Pedro R. A. S. Bassi</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Wenxuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Isensee%2C+F">Fabian Isensee</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zifu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jieneng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chou%2C+Y">Yu-Cheng Chou</a>, <a href="/search/cs?searchtype=author&amp;query=Kirchhoff%2C+Y">Yannick Kirchhoff</a>, <a href="/search/cs?searchtype=author&amp;query=Rokuss%2C+M">Maximilian Rokuss</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Ziyan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+J">Jin Ye</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Junjun He</a>, <a href="/search/cs?searchtype=author&amp;query=Wald%2C+T">Tassilo Wald</a>, <a href="/search/cs?searchtype=author&amp;query=Ulrich%2C+C">Constantin Ulrich</a>, <a href="/search/cs?searchtype=author&amp;query=Baumgartner%2C+M">Michael Baumgartner</a>, <a href="/search/cs?searchtype=author&amp;query=Roy%2C+S">Saikat Roy</a>, <a href="/search/cs?searchtype=author&amp;query=Maier-Hein%2C+K+H">Klaus H. Maier-Hein</a>, <a href="/search/cs?searchtype=author&amp;query=Jaeger%2C+P">Paul Jaeger</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Y">Yiwen Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Y">Yutong Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jianpeng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Ziyang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yong Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+Z">Zhaohu Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+L">Lei Zhu</a> , et al. (28 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03670v1-abstract-short" style="display: inline;"> How can we test AI performance? This question seems trivial, but it isn&#39;t. Standard benchmarks often have problems such as in-distribution and small-size test sets, oversimplified metrics, unfair comparisons, and short-term outcome pressure. As a consequence, good performance on standard benchmarks does not guarantee success in real-world scenarios. To address these problems, we present Touchstone&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03670v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03670v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03670v1-abstract-full" style="display: none;"> How can we test AI performance? This question seems trivial, but it isn&#39;t. Standard benchmarks often have problems such as in-distribution and small-size test sets, oversimplified metrics, unfair comparisons, and short-term outcome pressure. As a consequence, good performance on standard benchmarks does not guarantee success in real-world scenarios. To address these problems, we present Touchstone, a large-scale collaborative segmentation benchmark of 9 types of abdominal organs. This benchmark is based on 5,195 training CT scans from 76 hospitals around the world and 5,903 testing CT scans from 11 additional hospitals. This diverse test set enhances the statistical significance of benchmark results and rigorously evaluates AI algorithms across various out-of-distribution scenarios. We invited 14 inventors of 19 AI algorithms to train their algorithms, while our team, as a third party, independently evaluated these algorithms on three test sets. In addition, we also evaluated pre-existing AI frameworks--which, differing from algorithms, are more flexible and can support different algorithms--including MONAI from NVIDIA, nnU-Net from DKFZ, and numerous other open-source frameworks. We are committed to expanding this benchmark to encourage more innovation of AI algorithms for the medical domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03670v1-abstract-full').style.display = 'none'; document.getElementById('2411.03670v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS-2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03656">arXiv:2411.03656</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03656">pdf</a>, <a href="https://arxiv.org/format/2411.03656">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Requirements Engineering for Older Adult Digital Health Software: A Systematic Literature Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yuqing Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Grundy%2C+J">John Grundy</a>, <a href="/search/cs?searchtype=author&amp;query=Madugalla%2C+A">Anuradha Madugalla</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03656v1-abstract-short" style="display: inline;"> Growth of the older adult population has led to an increasing interest in technology-supported aged care. However, the area has some challenges such as a lack of caregivers and limitations in understanding the emotional, social, physical, and mental well-being needs of seniors. Furthermore, there is a gap in the understanding between developers and ageing people of their requirements. Digital heal&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03656v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03656v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03656v1-abstract-full" style="display: none;"> Growth of the older adult population has led to an increasing interest in technology-supported aged care. However, the area has some challenges such as a lack of caregivers and limitations in understanding the emotional, social, physical, and mental well-being needs of seniors. Furthermore, there is a gap in the understanding between developers and ageing people of their requirements. Digital health can be important in supporting older adults wellbeing, emotional requirements, and social needs. Requirements Engineering (RE) is a major software engineering field, which can help to identify, elicit and prioritize the requirements of stakeholders and ensure that the systems meet standards for performance, reliability, and usability. We carried out a systematic review of the literature on RE for older adult digital health software. This was necessary to show the representatives of the current stage of understanding the needs of older adults in aged care digital health. Using established guidelines outlined by the Kitchenham method, the PRISMA and the PICO guideline, we developed a protocol, followed by the systematic exploration of eight databases. This resulted in 69 primary studies of high relevance, which were subsequently subjected to data extraction, synthesis, and reporting. We highlight key RE processes in digital health software for ageing people. It explored the utilization of technology for older user well-being and care, and the evaluations of such solutions. The review also identified key limitations found in existing primary studies that inspire future research opportunities. The results indicate that requirement gathering and understanding have a significant variation between different studies. The differences are in the quality, depth, and techniques adopted for requirement gathering and these differences are largely due to uneven adoption of RE methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03656v1-abstract-full').style.display = 'none'; document.getElementById('2411.03656v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arxiv version of SLR on RE for Older Adult Digital Health Software</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02840">arXiv:2411.02840</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.02840">pdf</a>, <a href="https://arxiv.org/format/2411.02840">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Test-Time Dynamic Image Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cao%2C+B">Bing Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yinan Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yi Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Changqing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Q">Qinghua Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02840v1-abstract-short" style="display: inline;"> The inherent challenge of image fusion lies in capturing the correlation of multi-source images and comprehensively integrating effective information from different sources. Most existing techniques fail to perform dynamic image fusion while notably lacking theoretical guarantees, leading to potential deployment risks in this field. Is it possible to conduct dynamic image fusion with a clear theor&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02840v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02840v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02840v1-abstract-full" style="display: none;"> The inherent challenge of image fusion lies in capturing the correlation of multi-source images and comprehensively integrating effective information from different sources. Most existing techniques fail to perform dynamic image fusion while notably lacking theoretical guarantees, leading to potential deployment risks in this field. Is it possible to conduct dynamic image fusion with a clear theoretical justification? In this paper, we give our solution from a generalization perspective. We proceed to reveal the generalized form of image fusion and derive a new test-time dynamic image fusion paradigm. It provably reduces the upper bound of generalization error. Specifically, we decompose the fused image into multiple components corresponding to its source data. The decomposed components represent the effective information from the source data, thus the gap between them reflects the Relative Dominability (RD) of the uni-source data in constructing the fusion image. Theoretically, we prove that the key to reducing generalization error hinges on the negative correlation between the RD-based fusion weight and the uni-source reconstruction loss. Intuitively, RD dynamically highlights the dominant regions of each source and can be naturally converted to the corresponding fusion weight, achieving robust results. Extensive experiments and discussions with in-depth analysis on multiple benchmarks confirm our findings and superiority. Our code is available at https://github.com/Yinan-Xia/TTD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02840v1-abstract-full').style.display = 'none'; document.getElementById('2411.02840v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02461">arXiv:2411.02461</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.02461">pdf</a>, <a href="https://arxiv.org/format/2411.02461">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Multiple Dimensions of Trustworthiness in LLMs via Sparse Activation Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yuxin Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+C">Chaoqun Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yonggang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wenxiao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+B">Binbin Lin</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xiaofei He</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+X">Xu Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+J">Jieping Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02461v1-abstract-short" style="display: inline;"> As the development and application of Large Language Models (LLMs) continue to advance rapidly, enhancing their trustworthiness and aligning them with human preferences has become a critical area of research. Traditional methods rely heavily on extensive data for Reinforcement Learning from Human Feedback (RLHF), but representation engineering offers a new, training-free approach. This technique l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02461v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02461v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02461v1-abstract-full" style="display: none;"> As the development and application of Large Language Models (LLMs) continue to advance rapidly, enhancing their trustworthiness and aligning them with human preferences has become a critical area of research. Traditional methods rely heavily on extensive data for Reinforcement Learning from Human Feedback (RLHF), but representation engineering offers a new, training-free approach. This technique leverages semantic features to control the representation of LLM&#39;s intermediate hidden states, enabling the model to meet specific requirements such as increased honesty or heightened safety awareness. However, a significant challenge arises when attempting to fulfill multiple requirements simultaneously. It proves difficult to encode various semantic contents, like honesty and safety, into a singular semantic feature, restricting its practicality. In this work, we address this issue through ``Sparse Activation Control&#39;&#39;. By delving into the intrinsic mechanisms of LLMs, we manage to identify and pinpoint components that are closely related to specific tasks within the model, i.e., attention heads. These heads display sparse characteristics that allow for near-independent control over different tasks. Our experiments, conducted on the open-source Llama series models, have yielded encouraging results. The models were able to align with human preferences on issues of safety, factuality, and bias concurrently. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02461v1-abstract-full').style.display = 'none'; document.getElementById('2411.02461v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02265">arXiv:2411.02265</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.02265">pdf</a>, <a href="https://arxiv.org/format/2411.02265">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Hunyuan-Large: An Open-Source MoE Model with 52 Billion Activated Parameters by Tencent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+X">Xingwu Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yanfeng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yiqing Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+R">Ruobing Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+J">Jiaqi Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shuaipeng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhen Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+J">Jonny Han</a>, <a href="/search/cs?searchtype=author&amp;query=Shu%2C+X">Xiaobo Shu</a>, <a href="/search/cs?searchtype=author&amp;query=Bu%2C+J">Jiahao Bu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhongzhi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+X">Xuemeng Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Lian%2C+F">Fengzong Lian</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+S">Saiyong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+J">Jianfeng Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+Y">Yuyuan Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+X">Xiaoqin Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+C">Chao Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+L">Lulu Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+Y">Yue Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+J">Jun Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+T">Tao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+S">Suncong Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+K">Kan Wu</a> , et al. (83 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02265v3-abstract-short" style="display: inline;"> In this paper, we introduce Hunyuan-Large, which is currently the largest open-source Transformer-based mixture of experts model, with a total of 389 billion parameters and 52 billion activation parameters, capable of handling up to 256K tokens. We conduct a thorough evaluation of Hunyuan-Large&#39;s superior performance across various benchmarks including language understanding and generation, logica&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02265v3-abstract-full').style.display = 'inline'; document.getElementById('2411.02265v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02265v3-abstract-full" style="display: none;"> In this paper, we introduce Hunyuan-Large, which is currently the largest open-source Transformer-based mixture of experts model, with a total of 389 billion parameters and 52 billion activation parameters, capable of handling up to 256K tokens. We conduct a thorough evaluation of Hunyuan-Large&#39;s superior performance across various benchmarks including language understanding and generation, logical reasoning, mathematical problem-solving, coding, long-context, and aggregated tasks, where it outperforms LLama3.1-70B and exhibits comparable performance when compared to the significantly larger LLama3.1-405B model. Key practice of Hunyuan-Large include large-scale synthetic data that is orders larger than in previous literature, a mixed expert routing strategy, a key-value cache compression technique, and an expert-specific learning rate strategy. Additionally, we also investigate the scaling laws and learning rate schedule of mixture of experts models, providing valuable insights and guidances for future model development and optimization. The code and checkpoints of Hunyuan-Large are released to facilitate future innovations and applications. Codes: https://github.com/Tencent/Hunyuan-Large Models: https://huggingface.co/tencent/Tencent-Hunyuan-Large <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02265v3-abstract-full').style.display = 'none'; document.getElementById('2411.02265v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 4 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01679">arXiv:2411.01679</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01679">pdf</a>, <a href="https://arxiv.org/format/2411.01679">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Autoformulation of Mathematical Optimization Models Using LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Astorga%2C+N">Nicol谩s Astorga</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tennison Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yuanzhang Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=van+der+Schaar%2C+M">Mihaela van der Schaar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01679v1-abstract-short" style="display: inline;"> Mathematical optimization is fundamental to decision-making across diverse domains, from operations research to healthcare. Yet, translating real-world problems into optimization models remains a formidable challenge, often demanding specialized expertise. This paper formally introduces the concept of $\textbf{autoformulation}$ -- an automated approach to creating optimization models from natural&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01679v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01679v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01679v1-abstract-full" style="display: none;"> Mathematical optimization is fundamental to decision-making across diverse domains, from operations research to healthcare. Yet, translating real-world problems into optimization models remains a formidable challenge, often demanding specialized expertise. This paper formally introduces the concept of $\textbf{autoformulation}$ -- an automated approach to creating optimization models from natural language descriptions for commercial solvers. We identify the three core challenges of autoformulation: (1) defining the vast, problem-dependent hypothesis space, (2) efficiently searching this space under uncertainty, and (3) evaluating formulation correctness (ensuring a formulation accurately represents the problem). To address these challenges, we introduce a novel method leveraging $\textit{Large Language Models}$ (LLMs) within a $\textit{Monte-Carlo Tree Search}$ framework. This approach systematically explores the space of possible formulations by exploiting the hierarchical nature of optimization modeling. LLMs serve two key roles: as dynamic formulation hypothesis generators and as evaluators of formulation correctness. To enhance search efficiency, we introduce a pruning technique to remove trivially equivalent formulations. Empirical evaluations across benchmarks containing linear and mixed-integer programming problems demonstrate our method&#39;s superior performance. Additionally, we observe significant efficiency gains from employing LLMs for correctness evaluation and from our pruning techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01679v1-abstract-full').style.display = 'none'; document.getElementById('2411.01679v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01174">arXiv:2411.01174</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01174">pdf</a>, <a href="https://arxiv.org/format/2411.01174">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Leveraging LLM and Text-Queried Separation for Noise-Robust Sound Event Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yin%2C+H">Han Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yang Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+J">Jisheng Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+R+K">Rohan Kumar Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01174v1-abstract-short" style="display: inline;"> Sound Event Detection (SED) is challenging in noisy environments where overlapping sounds obscure target events. Language-queried audio source separation (LASS) aims to isolate the target sound events from a noisy clip. However, this approach can fail when the exact target sound is unknown, particularly in noisy test sets, leading to reduced performance. To address this issue, we leverage the capa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01174v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01174v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01174v1-abstract-full" style="display: none;"> Sound Event Detection (SED) is challenging in noisy environments where overlapping sounds obscure target events. Language-queried audio source separation (LASS) aims to isolate the target sound events from a noisy clip. However, this approach can fail when the exact target sound is unknown, particularly in noisy test sets, leading to reduced performance. To address this issue, we leverage the capabilities of large language models (LLMs) to analyze and summarize acoustic data. By using LLMs to identify and select specific noise types, we implement a noise augmentation method for noise-robust fine-tuning. The fine-tuned model is applied to predict clip-wise event predictions as text queries for the LASS model. Our studies demonstrate that the proposed method improves SED performance in noisy environments. This work represents an early application of LLMs in noise-robust SED and suggests a promising direction for handling overlapping events in SED. Codes and pretrained models are available at https://github.com/apple-yinhan/Noise-robust-SED. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01174v1-abstract-full').style.display = 'none'; document.getElementById('2411.01174v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICASSP 2025 Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23703">arXiv:2410.23703</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.23703">pdf</a>, <a href="https://arxiv.org/format/2410.23703">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> OCEAN: Offline Chain-of-thought Evaluation and Alignment in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Junda Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xintong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+R">Ruoyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yu Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Y">Yuxin Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jianing Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+T">Tong Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Kveton%2C+B">Branislav Kveton</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+L">Lina Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Shang%2C+J">Jingbo Shang</a>, <a href="/search/cs?searchtype=author&amp;query=McAuley%2C+J">Julian McAuley</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23703v1-abstract-short" style="display: inline;"> Offline evaluation of LLMs is crucial in understanding their capacities, though current methods remain underexplored in existing research. In this work, we focus on the offline evaluation of the chain-of-thought capabilities and show how to optimize LLMs based on the proposed evaluation method. To enable offline feedback with rich knowledge and reasoning paths, we use knowledge graphs (e.g., Wikid&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23703v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23703v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23703v1-abstract-full" style="display: none;"> Offline evaluation of LLMs is crucial in understanding their capacities, though current methods remain underexplored in existing research. In this work, we focus on the offline evaluation of the chain-of-thought capabilities and show how to optimize LLMs based on the proposed evaluation method. To enable offline feedback with rich knowledge and reasoning paths, we use knowledge graphs (e.g., Wikidata5m) to provide feedback on the generated chain of thoughts. Due to the heterogeneity between LLM reasoning and KG structures, direct interaction and feedback from KGs on LLM behavior are challenging, as they require accurate entity linking and grounding of LLM-generated chains of thought in the KG. To address the above challenge, we propose an offline chain-of-thought evaluation framework, OCEAN, which models chain-of-thought reasoning in LLMs as an MDP and evaluate the policy&#39;s alignment with KG preference modeling. To overcome the reasoning heterogeneity and grounding problems, we leverage on-policy KG exploration and RL to model a KG policy that generates token-level likelihood distributions for LLM-generated chain-of-thought reasoning paths, simulating KG reasoning preference. Then we incorporate the knowledge-graph feedback on the validity and alignment of the generated reasoning paths into inverse propensity scores and propose KG-IPS estimator. Theoretically, we prove the unbiasedness of the proposed KG-IPS estimator and provide a lower bound on its variance. With the off-policy evaluated value function, we can directly enable off-policy optimization to further enhance chain-of-thought alignment. Our empirical study shows that OCEAN can be efficiently optimized for generating chain-of-thought reasoning paths with higher estimated values without affecting LLMs&#39; general abilities in downstream tasks or their internal knowledge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23703v1-abstract-full').style.display = 'none'; document.getElementById('2410.23703v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21634">arXiv:2410.21634</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.21634">pdf</a>, <a href="https://arxiv.org/format/2410.21634">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Faster Local Solvers for Graph Diffusion Equations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bai%2C+J">Jiahe Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+B">Baojian Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+D">Deqing Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yanghua Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21634v1-abstract-short" style="display: inline;"> Efficient computation of graph diffusion equations (GDEs), such as Personalized PageRank, Katz centrality, and the Heat kernel, is crucial for clustering, training neural networks, and many other graph-related problems. Standard iterative methods require accessing the whole graph per iteration, making them time-consuming for large-scale graphs. While existing local solvers approximate diffusion ve&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21634v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21634v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21634v1-abstract-full" style="display: none;"> Efficient computation of graph diffusion equations (GDEs), such as Personalized PageRank, Katz centrality, and the Heat kernel, is crucial for clustering, training neural networks, and many other graph-related problems. Standard iterative methods require accessing the whole graph per iteration, making them time-consuming for large-scale graphs. While existing local solvers approximate diffusion vectors through heuristic local updates, they often operate sequentially and are typically designed for specific diffusion types, limiting their applicability. Given that diffusion vectors are highly localizable, as measured by the participation ratio, this paper introduces a novel framework for approximately solving GDEs using a local diffusion process. This framework reveals the suboptimality of existing local solvers. Furthermore, our approach effectively localizes standard iterative solvers by designing simple and provably sublinear time algorithms. These new local solvers are highly parallelizable, making them well-suited for implementation on GPUs. We demonstrate the effectiveness of our framework in quickly obtaining approximate diffusion vectors, achieving up to a hundred-fold speed improvement, and its applicability to large-scale dynamic graphs. Our framework could also facilitate more efficient local message-passing mechanisms for GNNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21634v1-abstract-full').style.display = 'none'; document.getElementById('2410.21634v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20011">arXiv:2410.20011</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.20011">pdf</a>, <a href="https://arxiv.org/format/2410.20011">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Small Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Van+Nguyen%2C+C">Chien Van Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+X">Xuan Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Aponte%2C+R">Ryan Aponte</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yu Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Basu%2C+S">Samyadeep Basu</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Z">Zhengmian Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jian Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&amp;query=Kunapuli%2C+S">Sasidhar Kunapuli</a>, <a href="/search/cs?searchtype=author&amp;query=Barrow%2C+J">Joe Barrow</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Junda Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Singh%2C+A">Ashish Singh</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+J">Jiuxiang Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Dernoncourt%2C+F">Franck Dernoncourt</a>, <a href="/search/cs?searchtype=author&amp;query=Ahmed%2C+N+K">Nesreen K. Ahmed</a>, <a href="/search/cs?searchtype=author&amp;query=Lipka%2C+N">Nedim Lipka</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+R">Ruiyi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+T">Tong Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sungchul Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Deilamsalehy%2C+H">Hanieh Deilamsalehy</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+N">Namyong Park</a>, <a href="/search/cs?searchtype=author&amp;query=Rimer%2C+M">Mike Rimer</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhehao Zhang</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20011v1-abstract-short" style="display: inline;"> Small Language Models (SLMs) have become increasingly important due to their efficiency and performance to perform various language tasks with minimal computational resources, making them ideal for various settings including on-device, mobile, edge devices, among many others. In this article, we present a comprehensive survey on SLMs, focusing on their architectures, training techniques, and model&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20011v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20011v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20011v1-abstract-full" style="display: none;"> Small Language Models (SLMs) have become increasingly important due to their efficiency and performance to perform various language tasks with minimal computational resources, making them ideal for various settings including on-device, mobile, edge devices, among many others. In this article, we present a comprehensive survey on SLMs, focusing on their architectures, training techniques, and model compression techniques. We propose a novel taxonomy for categorizing the methods used to optimize SLMs, including model compression, pruning, and quantization techniques. We summarize the benchmark datasets that are useful for benchmarking SLMs along with the evaluation metrics commonly used. Additionally, we highlight key open challenges that remain to be addressed. Our survey aims to serve as a valuable resource for researchers and practitioners interested in developing and deploying small yet efficient language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20011v1-abstract-full').style.display = 'none'; document.getElementById('2410.20011v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19795">arXiv:2410.19795</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.19795">pdf</a>, <a href="https://arxiv.org/format/2410.19795">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> SANSee: A Physical-layer Semantic-aware Networking Framework for Distributed Wireless Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+H">Huixiang Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yong Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yingyu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+G">Guangming Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Krunz%2C+M">Marwan Krunz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19795v1-abstract-short" style="display: inline;"> Contactless device-free wireless sensing has recently attracted significant interest due to its potential to support a wide range of immersive human-machine interactive applications using ubiquitously available radio frequency (RF) signals. Traditional approaches focus on developing a single global model based on a combined dataset collected from different locations. However, wireless signals are&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19795v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19795v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19795v1-abstract-full" style="display: none;"> Contactless device-free wireless sensing has recently attracted significant interest due to its potential to support a wide range of immersive human-machine interactive applications using ubiquitously available radio frequency (RF) signals. Traditional approaches focus on developing a single global model based on a combined dataset collected from different locations. However, wireless signals are known to be location and environment specific. Thus, a global model results in inconsistent and unreliable sensing results. It is also unrealistic to construct individual models for all the possible locations and environmental scenarios. Motivated by the observation that signals recorded at different locations are closely related to a set of physical-layer semantic features, in this paper we propose SANSee, a semantic-aware networking-based framework for distributed wireless sensing. SANSee allows models constructed in one or a limited number of locations to be transferred to new locations without requiring any locally labeled data or model training. SANSee is built on the concept of physical-layer semantic-aware network (pSAN), which characterizes the semantic similarity and the correlations of sensed data across different locations. A pSAN-based zero-shot transfer learning solution is introduced to allow receivers in new locations to obtain location-specific models by directly aggregating the models trained by other receivers. We theoretically prove that models obtained by SANSee can approach the locally optimal models. Experimental results based on real-world datasets are used to verify that the accuracy of the transferred models obtained by SANSee matches that of the models trained by the locally labeled data based on supervised learning approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19795v1-abstract-full').style.display = 'none'; document.getElementById('2410.19795v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted at IEEE Transactions on Mobile Computing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19786">arXiv:2410.19786</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.19786">pdf</a>, <a href="https://arxiv.org/format/2410.19786">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Resolution Enhancement of Under-sampled Photoacoustic Microscopy Images using Implicit Neural Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Youshen Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Liao%2C+S">Sheng Liao</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+X">Xuanyang Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+F">Fan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+X">Xinlong Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yunhui Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xiyu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+R">Ruixi Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuyao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+F">Fei Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19786v1-abstract-short" style="display: inline;"> Acoustic-Resolution Photoacoustic Microscopy (AR-PAM) is promising for subcutaneous vascular imaging, but its spatial resolution is constrained by the Point Spread Function (PSF). Traditional deconvolution methods like Richardson-Lucy and model-based deconvolution use the PSF to improve resolution. However, accurately measuring the PSF is difficult, leading to reliance on less accurate blind decon&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19786v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19786v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19786v1-abstract-full" style="display: none;"> Acoustic-Resolution Photoacoustic Microscopy (AR-PAM) is promising for subcutaneous vascular imaging, but its spatial resolution is constrained by the Point Spread Function (PSF). Traditional deconvolution methods like Richardson-Lucy and model-based deconvolution use the PSF to improve resolution. However, accurately measuring the PSF is difficult, leading to reliance on less accurate blind deconvolution techniques. Additionally, AR-PAM suffers from long scanning times, which can be reduced via down-sampling, but this necessitates effective image recovery from under-sampled data, a task where traditional interpolation methods fall short, particularly at high under-sampling rates. To address these challenges, we propose an approach based on Implicit Neural Representations (INR). This method learns a continuous mapping from spatial coordinates to initial acoustic pressure, overcoming the limitations of discrete imaging and enhancing AR-PAM&#39;s resolution. By treating the PSF as a learnable parameter within the INR framework, our technique mitigates inaccuracies associated with PSF estimation. We evaluated our method on simulated vascular data, showing significant improvements in Peak Signal-to-Noise Ratio (PSNR) and Structural Similarity Index (SSIM) over conventional methods. Qualitative enhancements were also observed in leaf vein and in vivo mouse brain microvasculature images. When applied to a custom AR-PAM system, experiments with pencil lead demonstrated that our method delivers sharper, higher-resolution results, indicating its potential to advance photoacoustic microscopy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19786v1-abstract-full').style.display = 'none'; document.getElementById('2410.19786v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17043">arXiv:2410.17043</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.17043">pdf</a>, <a href="https://arxiv.org/format/2410.17043">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Mixture-of-Experts Inference Time Combining Model Deployment and Communication Scheduling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jialong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tripathi%2C+S">Shreyansh Tripathi</a>, <a href="/search/cs?searchtype=author&amp;query=Rastogi%2C+L">Lakshay Rastogi</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+Y">Yiming Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+R">Rui Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yiting Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17043v1-abstract-short" style="display: inline;"> As machine learning models scale in size and complexity, their computational requirements become a significant barrier. Mixture-of-Experts (MoE) models alleviate this issue by selectively activating relevant experts. Despite this, MoE models are hindered by high communication overhead from all-to-all operations, low GPU utilization due to the synchronous communication constraint, and complications&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17043v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17043v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17043v1-abstract-full" style="display: none;"> As machine learning models scale in size and complexity, their computational requirements become a significant barrier. Mixture-of-Experts (MoE) models alleviate this issue by selectively activating relevant experts. Despite this, MoE models are hindered by high communication overhead from all-to-all operations, low GPU utilization due to the synchronous communication constraint, and complications from heterogeneous GPU environments. This paper presents Aurora, which optimizes both model deployment and all-to-all communication scheduling to address these challenges in MoE inference. Aurora achieves minimal communication times by strategically ordering token transmissions in all-to-all communications. It improves GPU utilization by colocating experts from different models on the same device, avoiding the limitations of synchronous all-to-all communication. We analyze Aurora&#39;s optimization strategies theoretically across four common GPU cluster settings: exclusive vs. colocated models on GPUs, and homogeneous vs. heterogeneous GPUs. Aurora provides optimal solutions for three cases, and for the remaining NP-hard scenario, it offers a polynomial-time sub-optimal solution with only a 1.07x degradation from the optimal. Aurora is the first approach to minimize MoE inference time via optimal model deployment and communication scheduling across various scenarios. Evaluations demonstrate that Aurora significantly accelerates inference, achieving speedups of up to 2.38x in homogeneous clusters and 3.54x in heterogeneous environments. Moreover, Aurora enhances GPU utilization by up to 1.5x compared to existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17043v1-abstract-full').style.display = 'none'; document.getElementById('2410.17043v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17012">arXiv:2410.17012</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.17012">pdf</a>, <a href="https://arxiv.org/format/2410.17012">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Nanosecond Precision Time Synchronization for Optical Data Center Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lei%2C+Y">Yiming Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jialong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhengqing Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Joshi%2C+R">Raj Joshi</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yiting Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17012v1-abstract-short" style="display: inline;"> Optical data center networks (DCNs) are renovating the infrastructure design for the cloud in the post Moore&#39;s law era. The fact that optical DCNs rely on optical circuits of microsecond-scale durations makes nanosecond-precision time synchronization essential for the correct functioning of routing on the network fabric. However, current studies on optical DCNs neglect the fundamental need for acc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17012v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17012v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17012v1-abstract-full" style="display: none;"> Optical data center networks (DCNs) are renovating the infrastructure design for the cloud in the post Moore&#39;s law era. The fact that optical DCNs rely on optical circuits of microsecond-scale durations makes nanosecond-precision time synchronization essential for the correct functioning of routing on the network fabric. However, current studies on optical DCNs neglect the fundamental need for accurate time synchronization. In this paper, we bridge the gap by developing Nanosecond Optical Synchronization (NOS), the first nanosecond-precision synchronization solution for optical DCNs general to various optical hardware. NOS builds clock propagation trees on top of the dynamically reconfigured circuits in optical DCNs, allowing switches to seek better sync parents throughout time. It predicts drifts in the tree-building process, which enables minimization of sync errors. We also tailor today&#39;s sync protocols to the needs of optical DCNs, including reducing the number of sync messages to fit into short circuit durations and correcting timestamp errors for higher sync accuracy. Our implementation on programmable switches shows 28ns sync accuracy in a 192-ToR setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17012v1-abstract-full').style.display = 'none'; document.getElementById('2410.17012v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16512">arXiv:2410.16512</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16512">pdf</a>, <a href="https://arxiv.org/format/2410.16512">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TIPS: Text-Image Pretraining with Spatial Awareness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Maninis%2C+K">Kevis-Kokitsi Maninis</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+K">Kaifeng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ghosh%2C+S">Soham Ghosh</a>, <a href="/search/cs?searchtype=author&amp;query=Karpur%2C+A">Arjun Karpur</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+K">Koert Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Ye Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+B">Bingyi Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Salz%2C+D">Daniel Salz</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+G">Guangxing Han</a>, <a href="/search/cs?searchtype=author&amp;query=Dlabal%2C+J">Jan Dlabal</a>, <a href="/search/cs?searchtype=author&amp;query=Gnanapragasam%2C+D">Dan Gnanapragasam</a>, <a href="/search/cs?searchtype=author&amp;query=Seyedhosseini%2C+M">Mojtaba Seyedhosseini</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Howard Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Araujo%2C+A">Andre Araujo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16512v1-abstract-short" style="display: inline;"> While image-text representation learning has become very popular in recent years, existing models tend to lack spatial awareness and have limited direct applicability for dense understanding tasks. For this reason, self-supervised image-only pretraining is still the go-to method for many dense vision applications (e.g. depth estimation, semantic segmentation), despite the lack of explicit supervis&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16512v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16512v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16512v1-abstract-full" style="display: none;"> While image-text representation learning has become very popular in recent years, existing models tend to lack spatial awareness and have limited direct applicability for dense understanding tasks. For this reason, self-supervised image-only pretraining is still the go-to method for many dense vision applications (e.g. depth estimation, semantic segmentation), despite the lack of explicit supervisory signals. In this paper, we close this gap between image-text and self-supervised learning, by proposing a novel general-purpose image-text model, which can be effectively used off-the-shelf for dense and global vision tasks. Our method, which we refer to as Text-Image Pretraining with Spatial awareness (TIPS), leverages two simple and effective insights. First, on textual supervision: we reveal that replacing noisy web image captions by synthetically generated textual descriptions boosts dense understanding performance significantly, due to a much richer signal for learning spatially aware representations. We propose an adapted training method that combines noisy and synthetic captions, resulting in improvements across both dense and global understanding tasks. Second, on the learning technique: we propose to combine contrastive image-text learning with self-supervised masked image modeling, to encourage spatial coherence, unlocking substantial enhancements for downstream applications. Building on these two ideas, we scale our model using the transformer architecture, trained on a curated set of public images. Our experiments are conducted on 8 tasks involving 16 datasets in total, demonstrating strong off-the-shelf performance on both dense and global understanding, for several image-only and image-text tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16512v1-abstract-full').style.display = 'none'; document.getElementById('2410.16512v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16144">arXiv:2410.16144</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16144">pdf</a>, <a href="https://arxiv.org/format/2410.16144">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> 1-bit AI Infra: Part 1.1, Fast and Lossless BitNet b1.58 Inference on CPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jinheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hansong Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Ting Song</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+S">Shaoguang Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+S">Shuming Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yan Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+F">Furu Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16144v2-abstract-short" style="display: inline;"> Recent advances in 1-bit Large Language Models (LLMs), such as BitNet and BitNet b1.58, present a promising approach to enhancing the efficiency of LLMs in terms of speed and energy consumption. These developments also enable local LLM deployment across a broad range of devices. In this work, we introduce bitnet.cpp, a tailored software stack designed to unlock the full potential of 1-bit LLMs. Sp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16144v2-abstract-full').style.display = 'inline'; document.getElementById('2410.16144v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16144v2-abstract-full" style="display: none;"> Recent advances in 1-bit Large Language Models (LLMs), such as BitNet and BitNet b1.58, present a promising approach to enhancing the efficiency of LLMs in terms of speed and energy consumption. These developments also enable local LLM deployment across a broad range of devices. In this work, we introduce bitnet.cpp, a tailored software stack designed to unlock the full potential of 1-bit LLMs. Specifically, we develop a set of kernels to support fast and lossless inference of ternary BitNet b1.58 LLMs on CPUs. Extensive experiments demonstrate that bitnet.cpp achieves significant speedups, ranging from 2.37x to 6.17x on x86 CPUs and from 1.37x to 5.07x on ARM CPUs, across various model sizes. The code is available at https://github.com/microsoft/BitNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16144v2-abstract-full').style.display = 'none'; document.getElementById('2410.16144v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15682">arXiv:2410.15682</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.15682">pdf</a>, <a href="https://arxiv.org/format/2410.15682">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LRA.2024.3502056">10.1109/LRA.2024.3502056 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> RANSAC Back to SOTA: A Two-stage Consensus Filtering for Real-time 3D Registration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shi%2C+P">Pengcheng Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+S">Shaocheng Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yilin Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xinyi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yongjun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiayuan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15682v1-abstract-short" style="display: inline;"> Correspondence-based point cloud registration (PCR) plays a key role in robotics and computer vision. However, challenges like sensor noises, object occlusions, and descriptor limitations inevitably result in numerous outliers. RANSAC family is the most popular outlier removal solution. However, the requisite iterations escalate exponentially with the outlier ratio, rendering it far inferior to ex&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15682v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15682v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15682v1-abstract-full" style="display: none;"> Correspondence-based point cloud registration (PCR) plays a key role in robotics and computer vision. However, challenges like sensor noises, object occlusions, and descriptor limitations inevitably result in numerous outliers. RANSAC family is the most popular outlier removal solution. However, the requisite iterations escalate exponentially with the outlier ratio, rendering it far inferior to existing methods (SC2PCR [1], MAC [2], etc.) in terms of accuracy or speed. Thus, we propose a two-stage consensus filtering (TCF) that elevates RANSAC to state-of-the-art (SOTA) speed and accuracy. Firstly, one-point RANSAC obtains a consensus set based on length consistency. Subsequently, two-point RANSAC refines the set via angle consistency. Then, three-point RANSAC computes a coarse pose and removes outliers based on transformed correspondence&#39;s distances. Drawing on optimizations from one-point and two-point RANSAC, three-point RANSAC requires only a few iterations. Eventually, an iterative reweighted least squares (IRLS) is applied to yield the optimal pose. Experiments on the large-scale KITTI and ETH datasets demonstrate our method achieves up to three-orders-of-magnitude speedup compared to MAC while maintaining registration accuracy and recall. Our code is available at https://github.com/ShiPC-AI/TCF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15682v1-abstract-full').style.display = 'none'; document.getElementById('2410.15682v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Robotics and Automation Letters 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15279">arXiv:2410.15279</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.15279">pdf</a>, <a href="https://arxiv.org/format/2410.15279">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> ContextDet: Temporal Action Detection with Adaptive Context Aggregation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+N">Ning Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yun Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+X">Xiaopeng Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+X">Xiaojun Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xuanhong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+D">Dingyi Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15279v1-abstract-short" style="display: inline;"> Temporal action detection (TAD), which locates and recognizes action segments, remains a challenging task in video understanding due to variable segment lengths and ambiguous boundaries. Existing methods treat neighboring contexts of an action segment indiscriminately, leading to imprecise boundary predictions. We introduce a single-stage ContextDet framework, which makes use of large-kernel convo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15279v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15279v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15279v1-abstract-full" style="display: none;"> Temporal action detection (TAD), which locates and recognizes action segments, remains a challenging task in video understanding due to variable segment lengths and ambiguous boundaries. Existing methods treat neighboring contexts of an action segment indiscriminately, leading to imprecise boundary predictions. We introduce a single-stage ContextDet framework, which makes use of large-kernel convolutions in TAD for the first time. Our model features a pyramid adaptive context aggragation (ACA) architecture, capturing long context and improving action discriminability. Each ACA level consists of two novel modules. The context attention module (CAM) identifies salient contextual information, encourages context diversity, and preserves context integrity through a context gating block (CGB). The long context module (LCM) makes use of a mixture of large- and small-kernel convolutions to adaptively gather long-range context and fine-grained local features. Additionally, by varying the length of these large kernels across the ACA pyramid, our model provides lightweight yet effective context aggregation and action discrimination. We conducted extensive experiments and compared our model with a number of advanced TAD methods on six challenging TAD benchmarks: MultiThumos, Charades, FineAction, EPIC-Kitchens 100, Thumos14, and HACS, demonstrating superior accuracy at reduced inference speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15279v1-abstract-full').style.display = 'none'; document.getElementById('2410.15279v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15020">arXiv:2410.15020</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.15020">pdf</a>, <a href="https://arxiv.org/format/2410.15020">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Iterative Methods via Locally Evolving Set Process </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+B">Baojian Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yifan Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Harikandeh%2C+R+B">Reza Babanezhad Harikandeh</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+X">Xingzhi Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+D">Deqing Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yanghua Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15020v1-abstract-short" style="display: inline;"> Given the damping factor $伪$ and precision tolerance $蔚$, \citet{andersen2006local} introduced Approximate Personalized PageRank (APPR), the \textit{de facto local method} for approximating the PPR vector, with runtime bounded by $螛(1/(伪蔚))$ independent of the graph size. Recently, \citet{fountoulakis2022open} asked whether faster local algorithms could be developed using $\tilde{O}(1/(\sqrt伪蔚))$&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15020v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15020v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15020v1-abstract-full" style="display: none;"> Given the damping factor $伪$ and precision tolerance $蔚$, \citet{andersen2006local} introduced Approximate Personalized PageRank (APPR), the \textit{de facto local method} for approximating the PPR vector, with runtime bounded by $螛(1/(伪蔚))$ independent of the graph size. Recently, \citet{fountoulakis2022open} asked whether faster local algorithms could be developed using $\tilde{O}(1/(\sqrt伪蔚))$ operations. By noticing that APPR is a local variant of Gauss-Seidel, this paper explores the question of \textit{whether standard iterative solvers can be effectively localized}. We propose to use the \textit{locally evolving set process}, a novel framework to characterize the algorithm locality, and demonstrate that many standard solvers can be effectively localized. Let $\overline{\operatorname{vol}}{ (S_t)}$ and $\overline纬_{t}$ be the running average of volume and the residual ratio of active nodes $\textstyle S_{t}$ during the process. We show $\overline{\operatorname{vol}}{ (S_t)}/\overline纬_{t} \leq 1/蔚$ and prove APPR admits a new runtime bound $\tilde{O}(\overline{\operatorname{vol}}(S_t)/(伪\overline纬_{t}))$ mirroring the actual performance. Furthermore, when the geometric mean of residual reduction is $螛(\sqrt伪)$, then there exists $c \in (0,2)$ such that the local Chebyshev method has runtime $\tilde{O}(\overline{\operatorname{vol}}(S_{t})/(\sqrt伪(2-c)))$ without the monotonicity assumption. Numerical results confirm the efficiency of this novel framework and show up to a hundredfold speedup over corresponding standard solvers on real-world graphs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15020v1-abstract-full').style.display = 'none'; document.getElementById('2410.15020v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">58 pages, 15 figures, NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13987">arXiv:2410.13987</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.13987">pdf</a>, <a href="https://arxiv.org/format/2410.13987">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RiTeK: A Dataset for Large Language Models Complex Reasoning over Textual Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jiatan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+M">Mingchen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Z">Zonghai Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhichao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yongkang Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+F">Feiyun Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiaohan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+S">Shuo Han</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+H">Hong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13987v1-abstract-short" style="display: inline;"> Answering complex real-world questions often requires accurate retrieval from textual knowledge graphs (TKGs). The scarcity of annotated data, along with intricate topological structures, makes this task particularly challenging. As the nature of relational path information could enhance the inference ability of Large Language Models (LLMs), efficiently retrieving more complex relational path info&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13987v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13987v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13987v1-abstract-full" style="display: none;"> Answering complex real-world questions often requires accurate retrieval from textual knowledge graphs (TKGs). The scarcity of annotated data, along with intricate topological structures, makes this task particularly challenging. As the nature of relational path information could enhance the inference ability of Large Language Models (LLMs), efficiently retrieving more complex relational path information from TKGs presents another key challenge. To tackle these challenges, we first develop a Dataset for LLMs Complex Reasoning over Textual Knowledge Graphs (RiTeK) with a broad topological structure coverage.We synthesize realistic user queries that integrate diverse topological structures, relational information, and complex textual descriptions. We conduct rigorous expert evaluation to validate the quality of our synthesized queries. And then, we introduce an enhanced Monte Carlo Tree Search (MCTS) method, Relational MCTS, to automatically extract relational path information from textual graphs for specific queries. Our dataset mainly covers the medical domain as the relation types and entity are complex and publicly available. Experimental results indicate that RiTeK poses significant challenges for current retrieval and LLM systems, while the proposed Relational MCTS method enhances LLM inference ability and achieves state-of-the-art performance on RiTeK. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13987v1-abstract-full').style.display = 'none'; document.getElementById('2410.13987v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13765">arXiv:2410.13765</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.13765">pdf</a>, <a href="https://arxiv.org/format/2410.13765">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Knowledge-Aware Query Expansion with Large Language Models for Textual and Relational Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yu Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Junda Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sungchul Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+T">Tong Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Rossi%2C+R+A">Ryan A. Rossi</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Haoliang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=McAuley%2C+J">Julian McAuley</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13765v1-abstract-short" style="display: inline;"> Large language models (LLMs) have been used to generate query expansions augmenting original queries for improving information search. Recent studies also explore providing LLMs with initial retrieval results to generate query expansions more grounded to document corpus. However, these methods mostly focus on enhancing textual similarities between search queries and target documents, overlooking d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13765v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13765v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13765v1-abstract-full" style="display: none;"> Large language models (LLMs) have been used to generate query expansions augmenting original queries for improving information search. Recent studies also explore providing LLMs with initial retrieval results to generate query expansions more grounded to document corpus. However, these methods mostly focus on enhancing textual similarities between search queries and target documents, overlooking document relations. For queries like &#34;Find me a highly rated camera for wildlife photography compatible with my Nikon F-Mount lenses&#34;, existing methods may generate expansions that are semantically similar but structurally unrelated to user intents. To handle such semi-structured queries with both textual and relational requirements, in this paper we propose a knowledge-aware query expansion framework, augmenting LLMs with structured document relations from knowledge graph (KG). To further address the limitation of entity-based scoring in existing KG-based methods, we leverage document texts as rich KG node representations and use document-based relation filtering for our Knowledge-Aware Retrieval (KAR). Extensive experiments on three datasets of diverse domains show the advantages of our method compared against state-of-the-art baselines on textual and relational semi-structured retrieval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13765v1-abstract-full').style.display = 'none'; document.getElementById('2410.13765v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13605">arXiv:2410.13605</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.13605">pdf</a>, <a href="https://arxiv.org/format/2410.13605">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Transformer-Based Approaches for Sensor-Based Human Activity Recognition: Opportunities and Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Leite%2C+C+S">Clayton Souza Leite</a>, <a href="/search/cs?searchtype=author&amp;query=Mauranen%2C+H">Henry Mauranen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhanabatyrova%2C+A">Aziza Zhanabatyrova</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yu Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13605v1-abstract-short" style="display: inline;"> Transformers have excelled in natural language processing and computer vision, paving their way to sensor-based Human Activity Recognition (HAR). Previous studies show that transformers outperform their counterparts exclusively when they harness abundant data or employ compute-intensive optimization algorithms. However, neither of these scenarios is viable in sensor-based HAR due to the scarcity o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13605v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13605v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13605v1-abstract-full" style="display: none;"> Transformers have excelled in natural language processing and computer vision, paving their way to sensor-based Human Activity Recognition (HAR). Previous studies show that transformers outperform their counterparts exclusively when they harness abundant data or employ compute-intensive optimization algorithms. However, neither of these scenarios is viable in sensor-based HAR due to the scarcity of data in this field and the frequent need to perform training and inference on resource-constrained devices. Our extensive investigation into various implementations of transformer-based versus non-transformer-based HAR using wearable sensors, encompassing more than 500 experiments, corroborates these concerns. We observe that transformer-based solutions pose higher computational demands, consistently yield inferior performance, and experience significant performance degradation when quantized to accommodate resource-constrained devices. Additionally, transformers demonstrate lower robustness to adversarial attacks, posing a potential threat to user trust in HAR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13605v1-abstract-full').style.display = 'none'; document.getElementById('2410.13605v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Xia%2C+Y&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10