CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 541 results for author: <span class="mathjax">Luo, H</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Luo%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Luo, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Luo%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Luo, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Luo%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.21322">arXiv:2503.21322</a> <span> [<a href="https://arxiv.org/pdf/2503.21322">pdf</a>, <a href="https://arxiv.org/format/2503.21322">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HyperGraphRAG: Retrieval-Augmented Generation with Hypergraph-Structured Knowledge Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=E%2C+H">Haihong E</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Guanting Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yandan Zheng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaobao Wu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yikai Guo</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Qika Lin</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yu Feng</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+Z">Zemin Kuang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Meina Song</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Tuan%2C+L+A">Luu Anh Tuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.21322v1-abstract-short" style="display: inline;"> While standard Retrieval-Augmented Generation (RAG) based on chunks, GraphRAG structures knowledge as graphs to leverage the relations among entities. However, previous GraphRAG methods are limited by binary relations: one edge in the graph only connects two entities, which cannot well model the n-ary relations among more than two entities that widely exist in reality. To address this limitation,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.21322v1-abstract-full').style.display = 'inline'; document.getElementById('2503.21322v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.21322v1-abstract-full" style="display: none;"> While standard Retrieval-Augmented Generation (RAG) based on chunks, GraphRAG structures knowledge as graphs to leverage the relations among entities. However, previous GraphRAG methods are limited by binary relations: one edge in the graph only connects two entities, which cannot well model the n-ary relations among more than two entities that widely exist in reality. To address this limitation, we propose HyperGraphRAG, a novel hypergraph-based RAG method that represents n-ary relational facts via hyperedges, modeling the complicated n-ary relations in the real world. To retrieve and generate over hypergraphs, we introduce a complete pipeline with a hypergraph construction method, a hypergraph retrieval strategy, and a hypergraph-guided generation mechanism. Experiments across medicine, agriculture, computer science, and law demonstrate that HyperGraphRAG outperforms standard RAG and GraphRAG in accuracy and generation quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.21322v1-abstract-full').style.display = 'none'; document.getElementById('2503.21322v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.20840">arXiv:2503.20840</a> <span> [<a href="https://arxiv.org/pdf/2503.20840">pdf</a>, <a href="https://arxiv.org/format/2503.20840">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> CodeTool: Enhancing Programmatic Tool Invocation of LLMs via Process Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yifei Lu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+F">Fanghua Ye</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jian Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Q">Qiang Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Cheng Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haibo Luo</a>, <a href="/search/cs?searchtype=author&query=Du%2C+N">Nan Du</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaolong Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+F">Feiliang Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.20840v1-abstract-short" style="display: inline;"> Tool invocation significantly enhances the capabilities of Large Language Models (LLMs), yet challenges persist, particularly in complex task scenarios. Current methods, such as instruction-enhanced reasoning and supervised fine-tuning, often result in unnecessarily long reasoning paths and face difficulties in verifying the correctness of intermediate steps. In this paper, we propose CodeTool, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20840v1-abstract-full').style.display = 'inline'; document.getElementById('2503.20840v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.20840v1-abstract-full" style="display: none;"> Tool invocation significantly enhances the capabilities of Large Language Models (LLMs), yet challenges persist, particularly in complex task scenarios. Current methods, such as instruction-enhanced reasoning and supervised fine-tuning, often result in unnecessarily long reasoning paths and face difficulties in verifying the correctness of intermediate steps. In this paper, we propose CodeTool, a novel framework for stepwise code generation that improves LLM tool invocation by leveraging the concise and easily verifiable nature of code. CodeTool incorporates two distinct process rewards: the On-the-spot Reward, which provides immediate feedback on the accuracy of each tool invocation, and the Latent Reward, which assesses the contribution of each step toward overall task completion. By maximizing the cumulative reward of the On-the-spot and Latend Rewards at each step, LLMs are guided to follow efficient and accurate reasoning paths. Extensive experiments on StableToolBench and RestBench-TMDB demonstrate the superiority of CodeTool over existing approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20840v1-abstract-full').style.display = 'none'; document.getElementById('2503.20840v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.18042">arXiv:2503.18042</a> <span> [<a href="https://arxiv.org/pdf/2503.18042">pdf</a>, <a href="https://arxiv.org/format/2503.18042">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DualCP: Rehearsal-Free Domain-Incremental Learning via Dual-Level Concept Prototype </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiang Wang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yuhang He</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+S">SongLin Dong</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xiang Song</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jizhou Han</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoyu Luo</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Y">Yihong Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.18042v1-abstract-short" style="display: inline;"> Domain-Incremental Learning (DIL) enables vision models to adapt to changing conditions in real-world environments while maintaining the knowledge acquired from previous domains. Given privacy concerns and training time, Rehearsal-Free DIL (RFDIL) is more practical. Inspired by the incremental cognitive process of the human brain, we design Dual-level Concept Prototypes (DualCP) for each class to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18042v1-abstract-full').style.display = 'inline'; document.getElementById('2503.18042v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.18042v1-abstract-full" style="display: none;"> Domain-Incremental Learning (DIL) enables vision models to adapt to changing conditions in real-world environments while maintaining the knowledge acquired from previous domains. Given privacy concerns and training time, Rehearsal-Free DIL (RFDIL) is more practical. Inspired by the incremental cognitive process of the human brain, we design Dual-level Concept Prototypes (DualCP) for each class to address the conflict between learning new knowledge and retaining old knowledge in RFDIL. To construct DualCP, we propose a Concept Prototype Generator (CPG) that generates both coarse-grained and fine-grained prototypes for each class. Additionally, we introduce a Coarse-to-Fine calibrator (C2F) to align image features with DualCP. Finally, we propose a Dual Dot-Regression (DDR) loss function to optimize our C2F module. Extensive experiments on the DomainNet, CDDB, and CORe50 datasets demonstrate the effectiveness of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18042v1-abstract-full').style.display = 'none'; document.getElementById('2503.18042v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.16749">arXiv:2503.16749</a> <span> [<a href="https://arxiv.org/pdf/2503.16749">pdf</a>, <a href="https://arxiv.org/format/2503.16749">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Revisiting DRAM Read Disturbance: Identifying Inconsistencies Between Experimental Characterization and Device-Level Studies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haocong Luo</a>, <a href="/search/cs?searchtype=author&query=Y%C3%BCksel%2C+%C4%B0+E">陌smail Emir Y眉ksel</a>, <a href="/search/cs?searchtype=author&query=Olgun%2C+A">Ataberk Olgun</a>, <a href="/search/cs?searchtype=author&query=Ya%C4%9Fl%C4%B1k%C3%A7%C4%B1%2C+A+G">A. Giray Ya臒l谋k莽谋</a>, <a href="/search/cs?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.16749v1-abstract-short" style="display: inline;"> Modern DRAM is vulnerable to read disturbance (e.g., RowHammer and RowPress) that significantly undermines the robust operation of the system. Repeatedly opening and closing a DRAM row (RowHammer) or keeping a DRAM row open for a long period of time (RowPress) induces bitflips in nearby unaccessed DRAM rows. Prior works on DRAM read disturbance either 1) perform experimental characterization using… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.16749v1-abstract-full').style.display = 'inline'; document.getElementById('2503.16749v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.16749v1-abstract-full" style="display: none;"> Modern DRAM is vulnerable to read disturbance (e.g., RowHammer and RowPress) that significantly undermines the robust operation of the system. Repeatedly opening and closing a DRAM row (RowHammer) or keeping a DRAM row open for a long period of time (RowPress) induces bitflips in nearby unaccessed DRAM rows. Prior works on DRAM read disturbance either 1) perform experimental characterization using commercial-off-the-shelf (COTS) DRAM chips to demonstrate the high-level characteristics of the read disturbance bitflips, or 2) perform device-level simulations to understand the low-level error mechanisms of the read disturbance bitflips. In this paper, we attempt to align and cross-validate the real-chip experimental characterization results and state-of-the-art device-level studies of DRAM read disturbance. To do so, we first identify and extract the key bitflip characteristics of RowHammer and RowPress from the device-level error mechanisms studied in prior works. Then, we perform experimental characterization on 96 COTS DDR4 DRAM chips that directly match the data and access patterns studied in the device-level works. Through our experiments, we identify fundamental inconsistencies in the RowHammer and RowPress bitflip directions and access pattern dependence between experimental characterization results and the device-level error mechanisms. Based on our results, we hypothesize that either 1) the retention failure based DRAM architecture reverse-engineering methodologies do not fully work on modern DDR4 DRAM chips, or 2) existing device-level works do not fully uncover all the major read disturbance error mechanisms. We hope our findings inspire and enable future works to build a more fundamental and comprehensive understanding of DRAM read disturbance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.16749v1-abstract-full').style.display = 'none'; document.getElementById('2503.16749v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.14473">arXiv:2503.14473</a> <span> [<a href="https://arxiv.org/pdf/2503.14473">pdf</a>, <a href="https://arxiv.org/format/2503.14473">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> EnQode: Fast Amplitude Embedding for Quantum Machine Learning Using Classical Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+J">Jason Han</a>, <a href="/search/cs?searchtype=author&query=DiBrita%2C+N+S">Nicholas S. DiBrita</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+Y">Younghyun Cho</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hengrui Luo</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+T">Tirthak Patel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.14473v1-abstract-short" style="display: inline;"> Amplitude embedding (AE) is essential in quantum machine learning (QML) for encoding classical data onto quantum circuits. However, conventional AE methods suffer from deep, variable-length circuits that introduce high output error due to extensive gate usage and variable error rates across samples, resulting in noise-driven inconsistencies that degrade model accuracy. We introduce EnQode, a fast… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.14473v1-abstract-full').style.display = 'inline'; document.getElementById('2503.14473v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.14473v1-abstract-full" style="display: none;"> Amplitude embedding (AE) is essential in quantum machine learning (QML) for encoding classical data onto quantum circuits. However, conventional AE methods suffer from deep, variable-length circuits that introduce high output error due to extensive gate usage and variable error rates across samples, resulting in noise-driven inconsistencies that degrade model accuracy. We introduce EnQode, a fast AE technique based on symbolic representation that addresses these limitations by clustering dataset samples and solving for cluster mean states through a low-depth, machine-specific ansatz. Optimized to reduce physical gates and SWAP operations, EnQode ensures all samples face consistent, low noise levels by standardizing circuit depth and composition. With over 90% fidelity in data mapping, EnQode enables robust, high-performance QML on noisy intermediate-scale quantum (NISQ) devices. Our open-source solution provides a scalable and efficient alternative for integrating classical data with quantum models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.14473v1-abstract-full').style.display = 'none'; document.getElementById('2503.14473v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EnQode will appear in the Proceedings of the Design Automation Conference (DAC), 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.10212">arXiv:2503.10212</a> <span> [<a href="https://arxiv.org/pdf/2503.10212">pdf</a>, <a href="https://arxiv.org/format/2503.10212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MouseGPT: A Large-scale Vision-Language Model for Mouse Behavior Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+T">Teng Xu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+T">Taotao Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Youjia Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peng Yang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Simin Tang</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+K">Kuixiang Shao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zifeng Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yifei Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xinyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hongshuang Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaohui Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Huoqing Luo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingya Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Ji Hu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jingyi Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.10212v2-abstract-short" style="display: inline;"> Analyzing animal behavior is crucial in advancing neuroscience, yet quantifying and deciphering its intricate dynamics remains a significant challenge. Traditional machine vision approaches, despite their ability to detect spontaneous behaviors, fall short due to limited interpretability and reliance on manual labeling, which restricts the exploration of the full behavioral spectrum. Here, we intr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.10212v2-abstract-full').style.display = 'inline'; document.getElementById('2503.10212v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.10212v2-abstract-full" style="display: none;"> Analyzing animal behavior is crucial in advancing neuroscience, yet quantifying and deciphering its intricate dynamics remains a significant challenge. Traditional machine vision approaches, despite their ability to detect spontaneous behaviors, fall short due to limited interpretability and reliance on manual labeling, which restricts the exploration of the full behavioral spectrum. Here, we introduce MouseGPT, a Vision-Language Model (VLM) that integrates visual cues with natural language to revolutionize mouse behavior analysis. Built upon our first-of-its-kind dataset - incorporating pose dynamics and open-vocabulary behavioral annotations across over 42 million frames of diverse psychiatric conditions - MouseGPT provides a novel, context-rich method for comprehensive behavior interpretation. Our holistic analysis framework enables detailed behavior profiling, clustering, and novel behavior discovery, offering deep insights without the need for labor - intensive manual annotation. Evaluations reveal that MouseGPT surpasses existing models in precision, adaptability, and descriptive richness, positioning it as a transformative tool for ethology and for unraveling complex behavioral dynamics in animal models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.10212v2-abstract-full').style.display = 'none'; document.getElementById('2503.10212v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">53 pages, 5 figures, 7 extended figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.09663">arXiv:2503.09663</a> <span> [<a href="https://arxiv.org/pdf/2503.09663">pdf</a>, <a href="https://arxiv.org/format/2503.09663">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Operating Systems">cs.OS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> BYOS: Knowledge-driven Large Language Models Bring Your Own Operating System More Excellent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hongyu Lin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuchen Li</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+K">Kaichun Yao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Libo Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+M">Mingjie Xing</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yanjun Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.09663v1-abstract-short" style="display: inline;"> Kernel configurations play an important role in the performance of Operating System (OS). However, with the rapid iteration of OS, finding the proper configurations that meet specific requirements can be challenging, which can be primarily attributed to the default kernel provided by vendors does not take the requirements of specific workloads into account, and the heavyweight tuning process canno… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.09663v1-abstract-full').style.display = 'inline'; document.getElementById('2503.09663v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.09663v1-abstract-full" style="display: none;"> Kernel configurations play an important role in the performance of Operating System (OS). However, with the rapid iteration of OS, finding the proper configurations that meet specific requirements can be challenging, which can be primarily attributed to the default kernel provided by vendors does not take the requirements of specific workloads into account, and the heavyweight tuning process cannot catch up with the rapid evolving pace of the kernel. To address these challenges, we propose BYOS, a novel framework powered by Large Language Models (LLMs) to customize kernel configurations for diverse user requirements. By integrating OS-oriented Dual-layer Knowledge Graph (OD-KG) and corresponding reasoning strategy, BYOS enhanced the LLM's understanding of the characteristics and capabilities of OS, thus enabling customized, cost-effective, and convenient generation of kernel configurations. Experiments show that the kernels configured by BYOS outperform the default vendor-configured kernels by 7.1% to 155.4%, demonstrating the effectiveness and efficiency of BYOS in customizing kernel configurations. Our code is available at https://github.com/LHY-24/BYOS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.09663v1-abstract-full').style.display = 'none'; document.getElementById('2503.09663v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.08317">arXiv:2503.08317</a> <span> [<a href="https://arxiv.org/pdf/2503.08317">pdf</a>, <a href="https://arxiv.org/format/2503.08317">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Uni-Gaussians: Unifying Camera and Lidar Simulation with Gaussians for Dynamic Driving Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+Z">Zikang Yuan</a>, <a href="/search/cs?searchtype=author&query=Pu%2C+Y">Yuechuan Pu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongcheng Luo</a>, <a href="/search/cs?searchtype=author&query=Lang%2C+F">Fengtian Lang</a>, <a href="/search/cs?searchtype=author&query=Chi%2C+C">Cheng Chi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Teng Li</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yingying Shen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Haiyang Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bing Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.08317v3-abstract-short" style="display: inline;"> Ensuring the safety of autonomous vehicles necessitates comprehensive simulation of multi-sensor data, encompassing inputs from both cameras and LiDAR sensors, across various dynamic driving scenarios. Neural rendering techniques, which utilize collected raw sensor data to simulate these dynamic environments, have emerged as a leading methodology. While NeRF-based approaches can uniformly represen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.08317v3-abstract-full').style.display = 'inline'; document.getElementById('2503.08317v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.08317v3-abstract-full" style="display: none;"> Ensuring the safety of autonomous vehicles necessitates comprehensive simulation of multi-sensor data, encompassing inputs from both cameras and LiDAR sensors, across various dynamic driving scenarios. Neural rendering techniques, which utilize collected raw sensor data to simulate these dynamic environments, have emerged as a leading methodology. While NeRF-based approaches can uniformly represent scenes for rendering data from both camera and LiDAR, they are hindered by slow rendering speeds due to dense sampling. Conversely, Gaussian Splatting-based methods employ Gaussian primitives for scene representation and achieve rapid rendering through rasterization. However, these rasterization-based techniques struggle to accurately model non-linear optical sensors. This limitation restricts their applicability to sensors beyond pinhole cameras. To address these challenges and enable unified representation of dynamic driving scenarios using Gaussian primitives, this study proposes a novel hybrid approach. Our method utilizes rasterization for rendering image data while employing Gaussian ray-tracing for LiDAR data rendering. Experimental results on public datasets demonstrate that our approach outperforms current state-of-the-art methods. This work presents a unified and efficient solution for realistic simulation of camera and LiDAR data in autonomous driving scenarios using Gaussian primitives, offering significant advancements in both rendering quality and computational efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.08317v3-abstract-full').style.display = 'none'; document.getElementById('2503.08317v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.07065">arXiv:2503.07065</a> <span> [<a href="https://arxiv.org/pdf/2503.07065">pdf</a>, <a href="https://arxiv.org/format/2503.07065">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Boosting the Generalization and Reasoning of Vision Language Models with Curriculum Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+H">Huilin Deng</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+D">Ding Zou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+R">Rui Ma</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongchen Luo</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yu Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.07065v1-abstract-short" style="display: inline;"> While state-of-the-art vision-language models (VLMs) have demonstrated remarkable capabilities in complex visual-text tasks, their success heavily relies on massive model scaling, limiting their practical deployment. Small-scale VLMs offer a more practical alternative but face significant challenges when trained with traditional supervised fine-tuning (SFT), particularly in two aspects: out-of-dom… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.07065v1-abstract-full').style.display = 'inline'; document.getElementById('2503.07065v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.07065v1-abstract-full" style="display: none;"> While state-of-the-art vision-language models (VLMs) have demonstrated remarkable capabilities in complex visual-text tasks, their success heavily relies on massive model scaling, limiting their practical deployment. Small-scale VLMs offer a more practical alternative but face significant challenges when trained with traditional supervised fine-tuning (SFT), particularly in two aspects: out-of-domain (OOD) generalization and reasoning abilities, which significantly lags behind the contemporary Large language models (LLMs). To address these challenges, we propose Curriculum Reinforcement Finetuning (Curr-ReFT), a novel post-training paradigm specifically designed for small-scale VLMs. Inspired by the success of reinforcement learning in LLMs, Curr-ReFT comprises two sequential stages: (1) Curriculum Reinforcement Learning, which ensures steady progression of model capabilities through difficulty-aware reward design, transitioning from basic visual perception to complex reasoning tasks; and (2) Rejected Sampling-based Self-improvement, which maintains the fundamental capabilities of VLMs through selective learning from high-quality multimodal and language examples. Extensive experiments demonstrate that models trained with Curr-ReFT paradigm achieve state-of-the-art performance across various visual tasks in both in-domain and out-of-domain settings. Moreover, our Curr-ReFT enhanced 3B model matches the performance of 32B-parameter models, demonstrating that efficient training paradigms can effectively bridge the gap between small and large models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.07065v1-abstract-full').style.display = 'none'; document.getElementById('2503.07065v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.06756">arXiv:2503.06756</a> <span> [<a href="https://arxiv.org/pdf/2503.06756">pdf</a>, <a href="https://arxiv.org/format/2503.06756">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Sphere Precoding for Robust Near-Field Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&query=Alkhateeb%2C+A">Ahmed Alkhateeb</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.06756v1-abstract-short" style="display: inline;"> Near-field communication with large antenna arrays promises significant beamforming and multiplexing gains. These communication links, however, are very sensitive to user mobility as any small change in the user position may suddenly drop the signal power. This leads to critical challenges for the robustness of these near-field communication systems. In this paper, we propose \textit{sphere precod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.06756v1-abstract-full').style.display = 'inline'; document.getElementById('2503.06756v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.06756v1-abstract-full" style="display: none;"> Near-field communication with large antenna arrays promises significant beamforming and multiplexing gains. These communication links, however, are very sensitive to user mobility as any small change in the user position may suddenly drop the signal power. This leads to critical challenges for the robustness of these near-field communication systems. In this paper, we propose \textit{sphere precoding}, which is a robust precoding design to address user mobility in near-field communications. To gain insights into the spatial correlation of near-field channels, we extend the one-ring channel model to what we call one-sphere channel model and derive the channel covariance considering user mobility. Based on the one-sphere channel model, a robust precoding design problem is defined to optimize the minimum signal-to-interference-plus-noise ratio (SINR) satisfaction probability among mobile users. By utilizing the eigen structure of channel covariance, we further design a relaxed convex problem to approximate the solution of the original non-convex problem. The low-complexity solution effectively shapes a sphere that maintains the signal power for the target user and also nulls its interference within spheres around the other users. Simulation results highlight the efficacy of the proposed solution in achieving robust precoding yet high achievable rates in near-field communication systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.06756v1-abstract-full').style.display = 'none'; document.getElementById('2503.06756v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The code for sphere precoding will be available on the Wireless Intelligence Lab website: https://www.wi-lab.net/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.05305">arXiv:2503.05305</a> <span> [<a href="https://arxiv.org/pdf/2503.05305">pdf</a>, <a href="https://arxiv.org/format/2503.05305">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Frequency Autoregressive Image Generation with Continuous Tokens </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hu Yu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+H">Hangjie Yuan</a>, <a href="/search/cs?searchtype=author&query=Rong%2C+Y">Yu Rong</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+F">Feng Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.05305v1-abstract-short" style="display: inline;"> Autoregressive (AR) models for image generation typically adopt a two-stage paradigm of vector quantization and raster-scan ``next-token prediction", inspired by its great success in language modeling. However, due to the huge modality gap, image autoregressive models may require a systematic reevaluation from two perspectives: tokenizer format and regression direction. In this paper, we introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05305v1-abstract-full').style.display = 'inline'; document.getElementById('2503.05305v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.05305v1-abstract-full" style="display: none;"> Autoregressive (AR) models for image generation typically adopt a two-stage paradigm of vector quantization and raster-scan ``next-token prediction", inspired by its great success in language modeling. However, due to the huge modality gap, image autoregressive models may require a systematic reevaluation from two perspectives: tokenizer format and regression direction. In this paper, we introduce the frequency progressive autoregressive (\textbf{FAR}) paradigm and instantiate FAR with the continuous tokenizer. Specifically, we identify spectral dependency as the desirable regression direction for FAR, wherein higher-frequency components build upon the lower one to progressively construct a complete image. This design seamlessly fits the causality requirement for autoregressive models and preserves the unique spatial locality of image data. Besides, we delve into the integration of FAR and the continuous tokenizer, introducing a series of techniques to address optimization challenges and improve the efficiency of training and inference processes. We demonstrate the efficacy of FAR through comprehensive experiments on the ImageNet dataset and verify its potential on text-to-image generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05305v1-abstract-full').style.display = 'none'; document.getElementById('2503.05305v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.05024">arXiv:2503.05024</a> <span> [<a href="https://arxiv.org/pdf/2503.05024">pdf</a>, <a href="https://arxiv.org/format/2503.05024">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Kernel-based estimators for functional causal effects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Raykov%2C+Y+P">Yordan P. Raykov</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hengrui Luo</a>, <a href="/search/cs?searchtype=author&query=Strait%2C+J+D">Justin D. Strait</a>, <a href="/search/cs?searchtype=author&query=KhudaBukhsh%2C+W+R">Wasiur R. KhudaBukhsh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.05024v2-abstract-short" style="display: inline;"> We propose causal effect estimators based on empirical Fr茅chet means and operator-valued kernels, tailored to functional data spaces. These methods address the challenges of high-dimensionality, sequential ordering, and model complexity while preserving robustness to treatment misspecification. Using structural assumptions, we obtain compact representations of potential outcomes, enabling scalable… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05024v2-abstract-full').style.display = 'inline'; document.getElementById('2503.05024v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.05024v2-abstract-full" style="display: none;"> We propose causal effect estimators based on empirical Fr茅chet means and operator-valued kernels, tailored to functional data spaces. These methods address the challenges of high-dimensionality, sequential ordering, and model complexity while preserving robustness to treatment misspecification. Using structural assumptions, we obtain compact representations of potential outcomes, enabling scalable estimation of causal effects over time and across covariates. We provide both theoretical, regarding the consistency of functional causal effects, as well as empirical comparison of a range of proposed causal effect estimators. Applications to binary treatment settings with functional outcomes illustrate the framework's utility in biomedical monitoring, where outcomes exhibit complex temporal dynamics. Our estimators accommodate scenarios with registered covariates and outcomes, aligning them to the Fr茅chet means, as well as cases requiring higher-order representations to capture intricate covariate-outcome interactions. These advancements extend causal inference to dynamic and non-linear domains, offering new tools for understanding complex treatment effects in functional data settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05024v2-abstract-full').style.display = 'none'; document.getElementById('2503.05024v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 62G05 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.04183">arXiv:2503.04183</a> <span> [<a href="https://arxiv.org/pdf/2503.04183">pdf</a>, <a href="https://arxiv.org/format/2503.04183">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CrowdHMTware: A Cross-level Co-adaptation Middleware for Context-aware Mobile DL Deployment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sicong Liu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+B">Bin Guo</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+S">Shiyan Luo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuzhan Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+C">Cheng Fang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yuan Xu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+K">Ke Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yao Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhiwen Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.04183v1-abstract-short" style="display: inline;"> There are many deep learning (DL) powered mobile and wearable applications today continuously and unobtrusively sensing the ambient surroundings to enhance all aspects of human lives.To enable robust and private mobile sensing, DL models are often deployed locally on resource-constrained mobile devices using techniques such as model compression or offloading.However, existing methods, either front… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.04183v1-abstract-full').style.display = 'inline'; document.getElementById('2503.04183v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.04183v1-abstract-full" style="display: none;"> There are many deep learning (DL) powered mobile and wearable applications today continuously and unobtrusively sensing the ambient surroundings to enhance all aspects of human lives.To enable robust and private mobile sensing, DL models are often deployed locally on resource-constrained mobile devices using techniques such as model compression or offloading.However, existing methods, either front-end algorithm level (i.e. DL model compression/partitioning) or back-end scheduling level (i.e. operator/resource scheduling), cannot be locally online because they require offline retraining to ensure accuracy or rely on manually pre-defined strategies, struggle with dynamic adaptability.The primary challenge lies in feeding back runtime performance from the back-end level to the front-end level optimization decision. Moreover, the adaptive mobile DL model porting middleware with cross-level co-adaptation is less explored, particularly in mobile environments with diversity and dynamics. In response, we introduce CrowdHMTware, a dynamic context-adaptive DL model deployment middleware for heterogeneous mobile devices. It establishes an automated adaptation loop between cross-level functional components, i.e. elastic inference, scalable offloading, and model-adaptive engine, enhancing scalability and adaptability. Experiments with four typical tasks across 15 platforms and a real-world case study demonstrate that CrowdHMTware can effectively scale DL model, offloading, and engine actions across diverse platforms and tasks. It hides run-time system issues from developers, reducing the required developer expertise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.04183v1-abstract-full').style.display = 'none'; document.getElementById('2503.04183v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted by IEEE Transactions on Mobile Computing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.03256">arXiv:2503.03256</a> <span> [<a href="https://arxiv.org/pdf/2503.03256">pdf</a>, <a href="https://arxiv.org/format/2503.03256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BAT: Learning Event-based Optical Flow with Bidirectional Adaptive Temporal Correlation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+G">Gangwei Xu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Haotong Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaoxing Zhang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongcheng Luo</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Haiyang Sun</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.03256v1-abstract-short" style="display: inline;"> Event cameras deliver visual information characterized by a high dynamic range and high temporal resolution, offering significant advantages in estimating optical flow for complex lighting conditions and fast-moving objects. Current advanced optical flow methods for event cameras largely adopt established image-based frameworks. However, the spatial sparsity of event data limits their performance.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.03256v1-abstract-full').style.display = 'inline'; document.getElementById('2503.03256v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.03256v1-abstract-full" style="display: none;"> Event cameras deliver visual information characterized by a high dynamic range and high temporal resolution, offering significant advantages in estimating optical flow for complex lighting conditions and fast-moving objects. Current advanced optical flow methods for event cameras largely adopt established image-based frameworks. However, the spatial sparsity of event data limits their performance. In this paper, we present BAT, an innovative framework that estimates event-based optical flow using bidirectional adaptive temporal correlation. BAT includes three novel designs: 1) a bidirectional temporal correlation that transforms bidirectional temporally dense motion cues into spatially dense ones, enabling accurate and spatially dense optical flow estimation; 2) an adaptive temporal sampling strategy for maintaining temporal consistency in correlation; 3) spatially adaptive temporal motion aggregation to efficiently and adaptively aggregate consistent target motion features into adjacent motion features while suppressing inconsistent ones. Our results rank $1^{st}$ on the DSEC-Flow benchmark, outperforming existing state-of-the-art methods by a large margin while also exhibiting sharp edges and high-quality details. Notably, our BAT can accurately predict future optical flow using only past events, significantly outperforming E-RAFT's warm-start approach. Code: \textcolor{magenta}{https://github.com/gangweiX/BAT}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.03256v1-abstract-full').style.display = 'none'; document.getElementById('2503.03256v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.02825">arXiv:2503.02825</a> <span> [<a href="https://arxiv.org/pdf/2503.02825">pdf</a>, <a href="https://arxiv.org/format/2503.02825">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> On Separation Between Best-Iterate, Random-Iterate, and Last-Iterate Convergence of Learning in Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yang Cai</a>, <a href="/search/cs?searchtype=author&query=Farina%2C+G">Gabriele Farina</a>, <a href="/search/cs?searchtype=author&query=Grand-Cl%C3%A9ment%2C+J">Julien Grand-Cl茅ment</a>, <a href="/search/cs?searchtype=author&query=Kroer%2C+C">Christian Kroer</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chung-Wei Lee</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haipeng Luo</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+W">Weiqiang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.02825v1-abstract-short" style="display: inline;"> Non-ergodic convergence of learning dynamics in games is widely studied recently because of its importance in both theory and practice. Recent work (Cai et al., 2024) showed that a broad class of learning dynamics, including Optimistic Multiplicative Weights Update (OMWU), can exhibit arbitrarily slow last-iterate convergence even in simple $2 \times 2$ matrix games, despite many of these dynamics… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.02825v1-abstract-full').style.display = 'inline'; document.getElementById('2503.02825v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.02825v1-abstract-full" style="display: none;"> Non-ergodic convergence of learning dynamics in games is widely studied recently because of its importance in both theory and practice. Recent work (Cai et al., 2024) showed that a broad class of learning dynamics, including Optimistic Multiplicative Weights Update (OMWU), can exhibit arbitrarily slow last-iterate convergence even in simple $2 \times 2$ matrix games, despite many of these dynamics being known to converge asymptotically in the last iterate. It remains unclear, however, whether these algorithms achieve fast non-ergodic convergence under weaker criteria, such as best-iterate convergence. We show that for $2\times 2$ matrix games, OMWU achieves an $O(T^{-1/6})$ best-iterate convergence rate, in stark contrast to its slow last-iterate convergence in the same class of games. Furthermore, we establish a lower bound showing that OMWU does not achieve any polynomial random-iterate convergence rate, measured by the expected duality gaps across all iterates. This result challenges the conventional wisdom that random-iterate convergence is essentially equivalent to best-iterate convergence, with the former often used as a proxy for establishing the latter. Our analysis uncovers a new connection to dynamic regret and presents a novel two-phase approach to best-iterate convergence, which could be of independent interest. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.02825v1-abstract-full').style.display = 'none'; document.getElementById('2503.02825v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.02172">arXiv:2503.02172</a> <span> [<a href="https://arxiv.org/pdf/2503.02172">pdf</a>, <a href="https://arxiv.org/format/2503.02172">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> KGCompiler: Deep Learning Compilation Optimization for Knowledge Graph Complex Logical Query Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hongyu Lin</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+H">Hanghang Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shihao Gao</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+K">Kaichun Yao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Libo Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+M">Mingjie Xing</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yanjun Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.02172v1-abstract-short" style="display: inline;"> Complex Logical Query Answering (CLQA) involves intricate multi-hop logical reasoning over large-scale and potentially incomplete Knowledge Graphs (KGs). Although existing CLQA algorithms achieve high accuracy in answering such queries, their reasoning time and memory usage scale significantly with the number of First-Order Logic (FOL) operators involved, creating serious challenges for practical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.02172v1-abstract-full').style.display = 'inline'; document.getElementById('2503.02172v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.02172v1-abstract-full" style="display: none;"> Complex Logical Query Answering (CLQA) involves intricate multi-hop logical reasoning over large-scale and potentially incomplete Knowledge Graphs (KGs). Although existing CLQA algorithms achieve high accuracy in answering such queries, their reasoning time and memory usage scale significantly with the number of First-Order Logic (FOL) operators involved, creating serious challenges for practical deployment. In addition, current research primarily focuses on algorithm-level optimizations for CLQA tasks, often overlooking compiler-level optimizations, which can offer greater generality and scalability. To address these limitations, we introduce a Knowledge Graph Compiler, namely KGCompiler, the first deep learning compiler specifically designed for CLQA tasks. By incorporating KG-specific optimizations proposed in this paper, KGCompiler enhances the reasoning performance of CLQA algorithms without requiring additional manual modifications to their implementations. At the same time, it significantly reduces memory usage. Extensive experiments demonstrate that KGCompiler accelerates CLQA algorithms by factors ranging from 1.04x to 8.26x, with an average speedup of 3.71x. We also provide an interface to enable hands-on experience with KGCompiler. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.02172v1-abstract-full').style.display = 'none'; document.getElementById('2503.02172v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.01695">arXiv:2503.01695</a> <span> [<a href="https://arxiv.org/pdf/2503.01695">pdf</a>, <a href="https://arxiv.org/format/2503.01695">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Generate, Discriminate, Evolve: Enhancing Context Faithfulness via Fine-Grained Sentence-Level Self-Evolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+K">Kun Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianhua Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunxiang Li</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongyin Luo</a>, <a href="/search/cs?searchtype=author&query=Moustafa%2C+A">Abdalla Moustafa</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xixin Wu</a>, <a href="/search/cs?searchtype=author&query=Glass%2C+J">James Glass</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.01695v1-abstract-short" style="display: inline;"> Improving context faithfulness in large language models is essential for developing trustworthy retrieval augmented generation systems and mitigating hallucinations, especially in long-form question answering (LFQA) tasks or scenarios involving knowledge conflicts. Existing methods either intervene LLMs only at inference without addressing their inherent limitations or overlook the potential for s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.01695v1-abstract-full').style.display = 'inline'; document.getElementById('2503.01695v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.01695v1-abstract-full" style="display: none;"> Improving context faithfulness in large language models is essential for developing trustworthy retrieval augmented generation systems and mitigating hallucinations, especially in long-form question answering (LFQA) tasks or scenarios involving knowledge conflicts. Existing methods either intervene LLMs only at inference without addressing their inherent limitations or overlook the potential for self-improvement. In this paper, we introduce GenDiE (Generate, Discriminate, Evolve), a novel self-evolving framework that enhances context faithfulness through fine-grained sentence-level optimization. GenDiE combines both generative and discriminative training, equipping LLMs with self-generation and self-scoring capabilities to facilitate iterative self-evolution. This supports both data construction for model alignment and score-guided search during inference. Furthermore, by treating each sentence in a response as an independent optimization unit, GenDiE effectively addresses the limitations of previous approaches that optimize at the holistic answer level, which may miss unfaithful details. Experiments on ASQA (in-domain LFQA) and ConFiQA (out-of-domain counterfactual QA) datasets demonstrate that GenDiE surpasses various baselines in both faithfulness and correctness, and exhibits robust performance for domain adaptation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.01695v1-abstract-full').style.display = 'none'; document.getElementById('2503.01695v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.00515">arXiv:2503.00515</a> <span> [<a href="https://arxiv.org/pdf/2503.00515">pdf</a>, <a href="https://arxiv.org/format/2503.00515">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Class-Independent Increment: An Efficient Approach for Multi-label Class-Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+S">Songlin Dong</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yuhang He</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zhengdong Zhou</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoyu Luo</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xing Wei</a>, <a href="/search/cs?searchtype=author&query=Kot%2C+A+C">Alex C. Kot</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Y">Yihong Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.00515v1-abstract-short" style="display: inline;"> Current research on class-incremental learning primarily focuses on single-label classification tasks. However, real-world applications often involve multi-label scenarios, such as image retrieval and medical imaging. Therefore, this paper focuses on the challenging yet practical multi-label class-incremental learning (MLCIL) problem. In addition to the challenge of catastrophic forgetting, MLCIL… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00515v1-abstract-full').style.display = 'inline'; document.getElementById('2503.00515v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.00515v1-abstract-full" style="display: none;"> Current research on class-incremental learning primarily focuses on single-label classification tasks. However, real-world applications often involve multi-label scenarios, such as image retrieval and medical imaging. Therefore, this paper focuses on the challenging yet practical multi-label class-incremental learning (MLCIL) problem. In addition to the challenge of catastrophic forgetting, MLCIL encounters issues related to feature confusion, encompassing inter-session and intra-feature confusion. To address these problems, we propose a novel MLCIL approach called class-independent increment (CLIN). Specifically, in contrast to existing methods that extract image-level features, we propose a class-independent incremental network (CINet) to extract multiple class-level embeddings for multi-label samples. It learns and preserves the knowledge of different classes by constructing class-specific tokens. On this basis, we develop two novel loss functions, optimizing the learning of class-specific tokens and class-level embeddings, respectively. These losses aim to distinguish between new and old classes, further alleviating the problem of feature confusion. Extensive experiments on MS-COCO and PASCAL VOC datasets demonstrate the effectiveness of our method for improving recognition performance and mitigating forgetting on various MLCIL tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00515v1-abstract-full').style.display = 'none'; document.getElementById('2503.00515v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.20771">arXiv:2502.20771</a> <span> [<a href="https://arxiv.org/pdf/2502.20771">pdf</a>, <a href="https://arxiv.org/format/2502.20771">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> CSubBT: A Self-Adjusting Execution Framework for Mobile Manipulation System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+H">Huihui Guo</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Huizhang Luo</a>, <a href="/search/cs?searchtype=author&query=Pi%2C+H">Huilong Pi</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+M">Mingxing Duan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kenli Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chubo Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.20771v1-abstract-short" style="display: inline;"> With the advancements in modern intelligent technologies, mobile robots equipped with manipulators are increasingly operating in unstructured environments. These robots can plan sequences of actions for long-horizon tasks based on perceived information. However, in practice, the planned actions often fail due to discrepancies between the perceptual information used for planning and the actual cond… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.20771v1-abstract-full').style.display = 'inline'; document.getElementById('2502.20771v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.20771v1-abstract-full" style="display: none;"> With the advancements in modern intelligent technologies, mobile robots equipped with manipulators are increasingly operating in unstructured environments. These robots can plan sequences of actions for long-horizon tasks based on perceived information. However, in practice, the planned actions often fail due to discrepancies between the perceptual information used for planning and the actual conditions. In this paper, we introduce the {\itshape Conditional Subtree} (CSubBT), a general self-adjusting execution framework for mobile manipulation tasks based on Behavior Trees (BTs). CSubBT decomposes symbolic action into sub-actions and uses BTs to control their execution, addressing any potential anomalies during the process. CSubBT treats common anomalies as constraint non-satisfaction problems and continuously guides the robot in performing tasks by sampling new action parameters in the constraint space when anomalies are detected. We demonstrate the robustness of our framework through extensive manipulation experiments on different platforms, both in simulation and real-world settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.20771v1-abstract-full').style.display = 'none'; document.getElementById('2502.20771v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17625">arXiv:2502.17625</a> <span> [<a href="https://arxiv.org/pdf/2502.17625">pdf</a>, <a href="https://arxiv.org/format/2502.17625">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Instance-Dependent Regret Bounds for Learning Two-Player Zero-Sum Games with Bandit Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ito%2C+S">Shinji Ito</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haipeng Luo</a>, <a href="/search/cs?searchtype=author&query=Tsuchiya%2C+T">Taira Tsuchiya</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yue Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17625v1-abstract-short" style="display: inline;"> No-regret self-play learning dynamics have become one of the premier ways to solve large-scale games in practice. Accelerating their convergence via improving the regret of the players over the naive $O(\sqrt{T})$ bound after $T$ rounds has been extensively studied in recent years, but almost all studies assume access to exact gradient feedback. We address the question of whether acceleration is p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17625v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17625v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17625v1-abstract-full" style="display: none;"> No-regret self-play learning dynamics have become one of the premier ways to solve large-scale games in practice. Accelerating their convergence via improving the regret of the players over the naive $O(\sqrt{T})$ bound after $T$ rounds has been extensively studied in recent years, but almost all studies assume access to exact gradient feedback. We address the question of whether acceleration is possible under bandit feedback only and provide an affirmative answer for two-player zero-sum normal-form games. Specifically, we show that if both players apply the Tsallis-INF algorithm of Zimmert and Seldin (2018, arXiv:1807.07623), then their regret is at most $O(c_1 \log T + \sqrt{c_2 T})$, where $c_1$ and $c_2$ are game-dependent constants that characterize the difficulty of learning -- $c_1$ resembles the complexity of learning a stochastic multi-armed bandit instance and depends inversely on some gap measures, while $c_2$ can be much smaller than the number of actions when the Nash equilibria have a small support or are close to the boundary. In particular, for the case when a pure strategy Nash equilibrium exists, $c_2$ becomes zero, leading to an optimal instance-dependent regret bound as we show. We additionally prove that in this case, our algorithm also enjoys last-iterate convergence and can identify the pure strategy Nash equilibrium with near-optimal sample complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17625v1-abstract-full').style.display = 'none'; document.getElementById('2502.17625v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 91A26 (Primary) 68T05; 68Q32 (Secondary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.2.0; I.2.11; G.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16387">arXiv:2502.16387</a> <span> [<a href="https://arxiv.org/pdf/2502.16387">pdf</a>, <a href="https://arxiv.org/ps/2502.16387">ps</a>, <a href="https://arxiv.org/format/2502.16387">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Simultaneous Swap Regret Minimization via KL-Calibration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haipeng Luo</a>, <a href="/search/cs?searchtype=author&query=Senapati%2C+S">Spandan Senapati</a>, <a href="/search/cs?searchtype=author&query=Sharan%2C+V">Vatsal Sharan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16387v1-abstract-short" style="display: inline;"> Calibration is a fundamental concept that aims at ensuring the reliability of probabilistic predictions by aligning them with real-world outcomes. There is a surge of studies on new calibration measures that are easier to optimize compared to the classical $\ell_1$-Calibration while still having strong implications for downstream applications. One recent such example is the work by Fishelson et al… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16387v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16387v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16387v1-abstract-full" style="display: none;"> Calibration is a fundamental concept that aims at ensuring the reliability of probabilistic predictions by aligning them with real-world outcomes. There is a surge of studies on new calibration measures that are easier to optimize compared to the classical $\ell_1$-Calibration while still having strong implications for downstream applications. One recent such example is the work by Fishelson et al. (2025) who show that it is possible to achieve $O(T^{1/3})$ pseudo $\ell_2$-Calibration error via minimizing pseudo swap regret of the squared loss, which in fact implies the same bound for all bounded proper losses with a smooth univariate form. In this work, we significantly generalize their result in the following ways: (a) in addition to smooth univariate forms, our algorithm also simultaneously achieves $O(T^{1/3})$ swap regret for any proper loss with a twice continuously differentiable univariate form (such as Tsallis entropy); (b) our bounds hold not only for pseudo swap regret that measures losses using the forecaster's distributions on predictions, but also hold for the actual swap regret that measures losses using the forecaster's actual realized predictions. We achieve so by introducing a new stronger notion of calibration called (pseudo) KL-Calibration, which we show is equivalent to the (pseudo) swap regret for log loss. We prove that there exists an algorithm that achieves $O(T^{1/3})$ KL-Calibration error and provide an explicit algorithm that achieves $O(T^{1/3})$ pseudo KL-Calibration error. Moreover, we show that the same algorithm achieves $O(T^{1/3}(\log T)^{-1/3}\log(T/未))$ swap regret w.p. $\ge 1-未$ for any proper loss with a smooth univariate form, which implies $O(T^{1/3})$ $\ell_2$-Calibration error. A technical contribution of our work is a new randomized rounding procedure and a non-uniform discretization scheme to minimize the swap regret for log loss. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16387v1-abstract-full').style.display = 'none'; document.getElementById('2502.16387v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14768">arXiv:2502.14768</a> <span> [<a href="https://arxiv.org/pdf/2502.14768">pdf</a>, <a href="https://arxiv.org/format/2502.14768">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Logic-RL: Unleashing LLM Reasoning with Rule-Based Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Zitian Gao</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Q">Qingnan Ren</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoming Luo</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yuqian Hong</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+B">Bryan Dai</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Joey Zhou</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+K">Kai Qiu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhirong Wu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+C">Chong Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14768v1-abstract-short" style="display: inline;"> Inspired by the success of DeepSeek-R1, we explore the potential of rule-based reinforcement learning (RL) in large reasoning models. To analyze reasoning dynamics, we use synthetic logic puzzles as training data due to their controllable complexity and straightforward answer verification. We make some key technical contributions that lead to effective and stable RL training: a system prompt that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14768v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14768v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14768v1-abstract-full" style="display: none;"> Inspired by the success of DeepSeek-R1, we explore the potential of rule-based reinforcement learning (RL) in large reasoning models. To analyze reasoning dynamics, we use synthetic logic puzzles as training data due to their controllable complexity and straightforward answer verification. We make some key technical contributions that lead to effective and stable RL training: a system prompt that emphasizes the thinking and answering process, a stringent format reward function that penalizes outputs for taking shortcuts, and a straightforward training recipe that achieves stable convergence. Our 7B model develops advanced reasoning skills-such as reflection, verification, and summarization-that are absent from the logic corpus. Remarkably, after training on just 5K logic problems, it demonstrates generalization abilities to the challenging math benchmarks AIME and AMC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14768v1-abstract-full').style.display = 'none'; document.getElementById('2502.14768v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13075">arXiv:2502.13075</a> <span> [<a href="https://arxiv.org/pdf/2502.13075">pdf</a>, <a href="https://arxiv.org/format/2502.13075">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Variable Read Disturbance: An Experimental Analysis of Temporal Variation in DRAM Read Disturbance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Olgun%2C+A">Ataberk Olgun</a>, <a href="/search/cs?searchtype=author&query=Bostanci%2C+F+N">F. Nisa Bostanci</a>, <a href="/search/cs?searchtype=author&query=Yuksel%2C+I+E">Ismail Emir Yuksel</a>, <a href="/search/cs?searchtype=author&query=Canpolat%2C+O">Oguzhan Canpolat</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haocong Luo</a>, <a href="/search/cs?searchtype=author&query=Oliveira%2C+G+F">Geraldo F. Oliveira</a>, <a href="/search/cs?searchtype=author&query=Yaglikci%2C+A+G">A. Giray Yaglikci</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+M">Minesh Patel</a>, <a href="/search/cs?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13075v1-abstract-short" style="display: inline;"> Modern DRAM chips are subject to read disturbance errors. State-of-the-art read disturbance mitigations rely on accurate and exhaustive characterization of the read disturbance threshold (RDT) (e.g., the number of aggressor row activations needed to induce the first RowHammer or RowPress bitflip) of every DRAM row (of which there are millions or billions in a modern system) to prevent read disturb… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13075v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13075v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13075v1-abstract-full" style="display: none;"> Modern DRAM chips are subject to read disturbance errors. State-of-the-art read disturbance mitigations rely on accurate and exhaustive characterization of the read disturbance threshold (RDT) (e.g., the number of aggressor row activations needed to induce the first RowHammer or RowPress bitflip) of every DRAM row (of which there are millions or billions in a modern system) to prevent read disturbance bitflips securely and with low overhead. We experimentally demonstrate for the first time that the RDT of a DRAM row significantly and unpredictably changes over time. We call this new phenomenon variable read disturbance (VRD). Our experiments using 160 DDR4 chips and 4 HBM2 chips from three major manufacturers yield two key observations. First, it is very unlikely that relatively few RDT measurements can accurately identify the RDT of a DRAM row. The minimum RDT of a DRAM row appears after tens of thousands of measurements (e.g., up to 94,467), and the minimum RDT of a DRAM row is 3.5X smaller than the maximum RDT observed for that row. Second, the probability of accurately identifying a row's RDT with a relatively small number of measurements reduces with increasing chip density or smaller technology node size. Our empirical results have implications for the security guarantees of read disturbance mitigation techniques: if the RDT of a DRAM row is not identified accurately, these techniques can easily become insecure. We discuss and evaluate using a guardband for RDT and error-correcting codes for mitigating read disturbance bitflips in the presence of RDTs that change unpredictably over time. We conclude that a >10% guardband for the minimum observed RDT combined with SECDED or Chipkill-like SSC error-correcting codes could prevent read disturbance bitflips at the cost of large read disturbance mitigation performance overheads (e.g., 45% performance loss for an RDT guardband of 50%). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13075v1-abstract-full').style.display = 'none'; document.getElementById('2502.13075v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Extended version of our publication at the 31st IEEE International Symposium on High-Performance Computer Architecture (HPCA-31), 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12999">arXiv:2502.12999</a> <span> [<a href="https://arxiv.org/pdf/2502.12999">pdf</a>, <a href="https://arxiv.org/format/2502.12999">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Asymptotic Optimism of Random-Design Linear and Kernel Regression Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hengrui Luo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yunzhang Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12999v1-abstract-short" style="display: inline;"> We derived the closed-form asymptotic optimism of linear regression models under random designs, and generalizes it to kernel ridge regression. Using scaled asymptotic optimism as a generic predictive model complexity measure, we studied the fundamental different behaviors of linear regression model, tangent kernel (NTK) regression model and three-layer fully connected neural networks (NN). Our co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12999v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12999v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12999v1-abstract-full" style="display: none;"> We derived the closed-form asymptotic optimism of linear regression models under random designs, and generalizes it to kernel ridge regression. Using scaled asymptotic optimism as a generic predictive model complexity measure, we studied the fundamental different behaviors of linear regression model, tangent kernel (NTK) regression model and three-layer fully connected neural networks (NN). Our contribution is two-fold: we provided theoretical ground for using scaled optimism as a model predictive complexity measure; and we show empirically that NN with ReLUs behaves differently from kernel models under this measure. With resampling techniques, we can also compute the optimism for regression models with real data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12999v1-abstract-full').style.display = 'none'; document.getElementById('2502.12999v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">56 pages;</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T05; 68Q32 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12963">arXiv:2502.12963</a> <span> [<a href="https://arxiv.org/pdf/2502.12963">pdf</a>, <a href="https://arxiv.org/format/2502.12963">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> D3-ARM: High-Dynamic, Dexterous and Fully Decoupled Cable-driven Robotic Arm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hong Luo</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jianle Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shoujie Li</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+H">Huayue Liang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yanbo Chen</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+C">Chongkun Xia</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xueqian Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12963v1-abstract-short" style="display: inline;"> Cable transmission enables motors of robotic arm to operate lightweight and low-inertia joints remotely in various environments, but it also creates issues with motion coupling and cable routing that can reduce arm's control precision and performance. In this paper, we present a novel motion decoupling mechanism with low-friction to align the cables and efficiently transmit the motor's power. By a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12963v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12963v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12963v1-abstract-full" style="display: none;"> Cable transmission enables motors of robotic arm to operate lightweight and low-inertia joints remotely in various environments, but it also creates issues with motion coupling and cable routing that can reduce arm's control precision and performance. In this paper, we present a novel motion decoupling mechanism with low-friction to align the cables and efficiently transmit the motor's power. By arranging these mechanisms at the joints, we fabricate a fully decoupled and lightweight cable-driven robotic arm called D3-Arm with all the electrical components be placed at the base. Its 776 mm length moving part boasts six degrees of freedom (DOF) and only 1.6 kg weights. To address the issue of cable slack, a cable-pretension mechanism is integrated to enhance the stability of long-distance cable transmission. Through a series of comprehensive tests, D3-Arm demonstrated 1.29 mm average positioning error and 2.0 kg payload capacity, proving the practicality of the proposed decoupling mechanisms in cable-driven robotic arm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12963v1-abstract-full').style.display = 'none'; document.getElementById('2502.12963v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12650">arXiv:2502.12650</a> <span> [<a href="https://arxiv.org/pdf/2502.12650">pdf</a>, <a href="https://arxiv.org/format/2502.12650">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Chronus: Understanding and Securing the Cutting-Edge Industry Solutions to DRAM Read Disturbance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Canpolat%2C+O">O臒uzhan Canpolat</a>, <a href="/search/cs?searchtype=author&query=Ya%C4%9Fl%C4%B1k%C3%A7%C4%B1%2C+A+G">A. Giray Ya臒l谋k莽谋</a>, <a href="/search/cs?searchtype=author&query=Oliveira%2C+G+F">Geraldo F. Oliveira</a>, <a href="/search/cs?searchtype=author&query=Olgun%2C+A">Ataberk Olgun</a>, <a href="/search/cs?searchtype=author&query=Bostanc%C4%B1%2C+N">Nisa Bostanc谋</a>, <a href="/search/cs?searchtype=author&query=Y%C3%BCksel%2C+%C4%B0+E">陌smail Emir Y眉ksel</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haocong Luo</a>, <a href="/search/cs?searchtype=author&query=Ergin%2C+O">O臒uz Ergin</a>, <a href="/search/cs?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12650v1-abstract-short" style="display: inline;"> We 1) present the first rigorous security, performance, energy, and cost analyses of the state-of-the-art on-DRAM-die read disturbance mitigation method, Per Row Activation Counting (PRAC) and 2) propose Chronus, a new mechanism that addresses PRAC's two major weaknesses. Our analysis shows that PRAC's system performance overhead on benign applications is non-negligible for modern DRAM chips and p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12650v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12650v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12650v1-abstract-full" style="display: none;"> We 1) present the first rigorous security, performance, energy, and cost analyses of the state-of-the-art on-DRAM-die read disturbance mitigation method, Per Row Activation Counting (PRAC) and 2) propose Chronus, a new mechanism that addresses PRAC's two major weaknesses. Our analysis shows that PRAC's system performance overhead on benign applications is non-negligible for modern DRAM chips and prohibitively large for future DRAM chips that are more vulnerable to read disturbance. We identify two weaknesses of PRAC that cause these overheads. First, PRAC increases critical DRAM access latency parameters due to the additional time required to increment activation counters. Second, PRAC performs a constant number of preventive refreshes at a time, making it vulnerable to an adversarial access pattern, known as the wave attack, and consequently requiring it to be configured for significantly smaller activation thresholds. To address PRAC's two weaknesses, we propose a new on-DRAM-die RowHammer mitigation mechanism, Chronus. Chronus 1) updates row activation counters concurrently while serving accesses by separating counters from the data and 2) prevents the wave attack by dynamically controlling the number of preventive refreshes performed. Our performance analysis shows that Chronus's system performance overhead is near-zero for modern DRAM chips and very low for future DRAM chips. Chronus outperforms three variants of PRAC and three other state-of-the-art read disturbance solutions. We discuss Chronus's and PRAC's implications for future systems and foreshadow future research directions. To aid future research, we open-source our Chronus implementation at https://github.com/CMU-SAFARI/Chronus. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12650v1-abstract-full').style.display = 'none'; document.getElementById('2502.12650v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in HPCA'25. arXiv admin note: text overlap with arXiv:2406.19094</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12568">arXiv:2502.12568</a> <span> [<a href="https://arxiv.org/pdf/2502.12568">pdf</a>, <a href="https://arxiv.org/format/2502.12568">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Cognitive Writing Perspective for Constrained Long-Form Text Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wan%2C+K">Kaiyang Wan</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+H">Honglin Mu</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+R">Rui Hao</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+T">Tianle Gu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiuying Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12568v2-abstract-short" style="display: inline;"> Like humans, Large Language Models (LLMs) struggle to generate high-quality long-form text that adheres to strict requirements in a single pass. This challenge is unsurprising, as successful human writing, according to the Cognitive Writing Theory, is a complex cognitive process involving iterative planning, translating, reviewing, and monitoring. Motivated by these cognitive principles, we aim to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12568v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12568v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12568v2-abstract-full" style="display: none;"> Like humans, Large Language Models (LLMs) struggle to generate high-quality long-form text that adheres to strict requirements in a single pass. This challenge is unsurprising, as successful human writing, according to the Cognitive Writing Theory, is a complex cognitive process involving iterative planning, translating, reviewing, and monitoring. Motivated by these cognitive principles, we aim to equip LLMs with human-like cognitive writing capabilities through CogWriter, a novel training-free framework that transforms LLM constrained long-form text generation into a systematic cognitive writing paradigm. Our framework consists of two key modules: (1) a Planning Agent that performs hierarchical planning to decompose the task, and (2) multiple Generation Agents that execute these plans in parallel. The system maintains quality via continuous monitoring and reviewing mechanisms, which evaluate outputs against specified requirements and trigger necessary revisions. CogWriter demonstrates exceptional performance on LongGenBench, a benchmark for complex constrained long-form text generation. Even when using Qwen-2.5-14B as its backbone, CogWriter surpasses GPT-4o by 22% in complex instruction completion accuracy while reliably generating texts exceeding 10,000 words. We hope this cognitive science-inspired approach provides a paradigm for LLM writing advancements: \href{https://github.com/KaiyangWan/CogWriter}{CogWriter}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12568v2-abstract-full').style.display = 'none'; document.getElementById('2502.12568v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12529">arXiv:2502.12529</a> <span> [<a href="https://arxiv.org/pdf/2502.12529">pdf</a>, <a href="https://arxiv.org/ps/2502.12529">ps</a>, <a href="https://arxiv.org/format/2502.12529">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Alternating Regret for Online Convex Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hait%2C+S">Soumita Hait</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Ping Li</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haipeng Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mengxiao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12529v1-abstract-short" style="display: inline;"> Motivated by alternating learning dynamics in two-player games, a recent work by Cevher et al.(2024) shows that $o(\sqrt{T})$ alternating regret is possible for any $T$-round adversarial Online Linear Optimization (OLO) problem, and left as an open question whether the same is true for general Online Convex Optimization (OCO). We answer this question in the affirmative by showing that the continuo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12529v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12529v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12529v1-abstract-full" style="display: none;"> Motivated by alternating learning dynamics in two-player games, a recent work by Cevher et al.(2024) shows that $o(\sqrt{T})$ alternating regret is possible for any $T$-round adversarial Online Linear Optimization (OLO) problem, and left as an open question whether the same is true for general Online Convex Optimization (OCO). We answer this question in the affirmative by showing that the continuous Hedge algorithm achieves $\tilde{\mathcal{O}}(d^{\frac{2}{3}}T^{\frac{1}{3}})$ alternating regret for any adversarial $d$-dimensional OCO problems. We show that this implies an alternating learning dynamic that finds a Nash equilibrium for any convex-concave zero-sum games or a coarse correlated equilibrium for any convex two-player general-sum games at a rate of $\tilde{\mathcal{O}}(d^{\frac{2}{3}}/T^{\frac{2}{3}})$. To further improve the time complexity and/or the dimension dependence, we propose another simple algorithm, Follow-the-Regularized-Leader with a regularizer whose convex conjugate is 3rd-order smooth, for OCO with smooth and self-concordant loss functions (such as linear or quadratic losses). We instantiate our algorithm with different regularizers and show that, for example, when the decision set is the $\ell_2$ ball, our algorithm achieves $\tilde{\mathcal{O}}(T^{\frac{2}{5}})$ alternating regret with no dimension dependence (and a better $\tilde{\mathcal{O}}(T^{\frac{1}{3}})$ bound for quadratic losses). We complement our results by showing some algorithm-specific alternating regret lower bounds, including a somewhat surprising $惟(\sqrt{T})$ lower bound for a Regret Matching variant that is widely used in alternating learning dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12529v1-abstract-full').style.display = 'none'; document.getElementById('2502.12529v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12528">arXiv:2502.12528</a> <span> [<a href="https://arxiv.org/pdf/2502.12528">pdf</a>, <a href="https://arxiv.org/format/2502.12528">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Contextual Linear Bandits with Delay as Payoff </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mengxiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yingfei Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haipeng Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12528v2-abstract-short" style="display: inline;"> A recent work by Schlisselberg et al. (2024) studies a delay-as-payoff model for stochastic multi-armed bandits, where the payoff (either loss or reward) is delayed for a period that is proportional to the payoff itself. While this captures many real-world applications, the simple multi-armed bandit setting limits the practicality of their results. In this paper, we address this limitation by stud… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12528v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12528v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12528v2-abstract-full" style="display: none;"> A recent work by Schlisselberg et al. (2024) studies a delay-as-payoff model for stochastic multi-armed bandits, where the payoff (either loss or reward) is delayed for a period that is proportional to the payoff itself. While this captures many real-world applications, the simple multi-armed bandit setting limits the practicality of their results. In this paper, we address this limitation by studying the delay-as-payoff model for contextual linear bandits. Specifically, we start from the case with a fixed action set and propose an efficient algorithm whose regret overhead compared to the standard no-delay case is at most $D螖_{\max}\log T$, where $T$ is the total horizon, $D$ is the maximum delay, and $螖_{\max}$ is the maximum suboptimality gap. When payoff is loss, we also show further improvement of the bound, demonstrating a separation between reward and loss similar to Schlisselberg et al. (2024). Contrary to standard linear bandit algorithms that construct least squares estimator and confidence ellipsoid, the main novelty of our algorithm is to apply a phased arm elimination procedure by only picking actions in a volumetric spanner of the action set, which addresses challenges arising from both payoff-dependent delays and large action sets. We further extend our results to the case with varying action sets by adopting the reduction from Hanna et al. (2023). Finally, we implement our algorithm and showcase its effectiveness and superior performance in experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12528v2-abstract-full').style.display = 'none'; document.getElementById('2502.12528v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11741">arXiv:2502.11741</a> <span> [<a href="https://arxiv.org/pdf/2502.11741">pdf</a>, <a href="https://arxiv.org/format/2502.11741">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SQL-o1: A Self-Reward Heuristic Dynamic Search Method for Text-to-SQL </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+S">Shuai Lyu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Ou%2C+Z">Zhonghong Ou</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+X">Xiaoran Shang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yang Qin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Meina Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11741v1-abstract-short" style="display: inline;"> The Text-to-SQL(Text2SQL) task aims to convert natural language queries into executable SQL queries. Thanks to the application of large language models (LLMs), significant progress has been made in this field. However, challenges such as model scalability, limited generation space, and coherence issues in SQL generation still persist. To address these issues, we propose SQL-o1, a Self-Reward-based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11741v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11741v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11741v1-abstract-full" style="display: none;"> The Text-to-SQL(Text2SQL) task aims to convert natural language queries into executable SQL queries. Thanks to the application of large language models (LLMs), significant progress has been made in this field. However, challenges such as model scalability, limited generation space, and coherence issues in SQL generation still persist. To address these issues, we propose SQL-o1, a Self-Reward-based heuristic search method designed to enhance the reasoning ability of LLMs in SQL query generation. SQL-o1 combines Monte Carlo Tree Search (MCTS) for heuristic process-level search and constructs a Schema-Aware dataset to help the model better understand database schemas. Extensive experiments on the Bird and Spider datasets demonstrate that SQL-o1 improves execution accuracy by 10.8\% on the complex Bird dataset compared to the latest baseline methods, even outperforming GPT-4-based approaches. Additionally, SQL-o1 excels in few-shot learning scenarios and shows strong cross-model transferability. Our code is publicly available at:https://github.com/ShuaiLyu0110/SQL-o1. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11741v1-abstract-full').style.display = 'none'; document.getElementById('2502.11741v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages,4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02414">arXiv:2502.02414</a> <span> [<a href="https://arxiv.org/pdf/2502.02414">pdf</a>, <a href="https://arxiv.org/format/2502.02414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Transolver++: An Accurate Neural Solver for PDEs on Million-Scale Geometries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Huakun Luo</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haixu Wu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hang Zhou</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+L">Lanxiang Xing</a>, <a href="/search/cs?searchtype=author&query=Di%2C+Y">Yichen Di</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianmin Wang</a>, <a href="/search/cs?searchtype=author&query=Long%2C+M">Mingsheng Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02414v2-abstract-short" style="display: inline;"> Although deep models have been widely explored in solving partial differential equations (PDEs), previous works are primarily limited to data only with up to tens of thousands of mesh points, far from the million-point scale required by industrial simulations that involve complex geometries. In the spirit of advancing neural PDE solvers to real industrial applications, we present Transolver++, a h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02414v2-abstract-full').style.display = 'inline'; document.getElementById('2502.02414v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02414v2-abstract-full" style="display: none;"> Although deep models have been widely explored in solving partial differential equations (PDEs), previous works are primarily limited to data only with up to tens of thousands of mesh points, far from the million-point scale required by industrial simulations that involve complex geometries. In the spirit of advancing neural PDE solvers to real industrial applications, we present Transolver++, a highly parallel and efficient neural solver that can accurately solve PDEs on million-scale geometries. Building upon previous advancements in solving PDEs by learning physical states via Transolver, Transolver++ is further equipped with an extremely optimized parallelism framework and a local adaptive mechanism to efficiently capture eidetic physical states from massive mesh points, successfully tackling the thorny challenges in computation and physics learning when scaling up input mesh size. Transolver++ increases the single-GPU input capacity to million-scale points for the first time and is capable of continuously scaling input size in linear complexity by increasing GPUs. Experimentally, Transolver++ yields 13% relative promotion across six standard PDE benchmarks and achieves over 20% performance gain in million-scale high-fidelity industrial simulations, whose sizes are 100$\times$ larger than previous benchmarks, covering car and 3D aircraft designs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02414v2-abstract-full').style.display = 'none'; document.getElementById('2502.02414v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02021">arXiv:2502.02021</a> <span> [<a href="https://arxiv.org/pdf/2502.02021">pdf</a>, <a href="https://arxiv.org/format/2502.02021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Multi-illuminant Color Constancy via Multi-scale Illuminant Estimation and Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hang Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongwei Li</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jinxing Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02021v1-abstract-short" style="display: inline;"> Multi-illuminant color constancy methods aim to eliminate local color casts within an image through pixel-wise illuminant estimation. Existing methods mainly employ deep learning to establish a direct mapping between an image and its illumination map, which neglects the impact of image scales. To alleviate this problem, we represent an illuminant map as the linear combination of components estimat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02021v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02021v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02021v1-abstract-full" style="display: none;"> Multi-illuminant color constancy methods aim to eliminate local color casts within an image through pixel-wise illuminant estimation. Existing methods mainly employ deep learning to establish a direct mapping between an image and its illumination map, which neglects the impact of image scales. To alleviate this problem, we represent an illuminant map as the linear combination of components estimated from multi-scale images. Furthermore, we propose a tri-branch convolution networks to estimate multi-grained illuminant distribution maps from multi-scale images. These multi-grained illuminant maps are merged adaptively with an attentional illuminant fusion module. Through comprehensive experimental analysis and evaluation, the results demonstrate the effectiveness of our method, and it has achieved state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02021v1-abstract-full').style.display = 'none'; document.getElementById('2502.02021v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 4 figures, this manuscript is under the consideration of Optics Express</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00695">arXiv:2502.00695</a> <span> [<a href="https://arxiv.org/pdf/2502.00695">pdf</a>, <a href="https://arxiv.org/format/2502.00695">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TMI-CLNet: Triple-Modal Interaction Network for Chronic Liver Disease Prognosis From Imaging, Clinical, and Radiomic Data Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+L">Linglong Wu</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+X">Xuhao Shan</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+R">Ruiquan Ge</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+R">Ruoyu Liang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chi Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yonghong Li</a>, <a href="/search/cs?searchtype=author&query=Elazab%2C+A">Ahmed Elazab</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Huoling Luo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yunbi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Changmiao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00695v1-abstract-short" style="display: inline;"> Chronic liver disease represents a significant health challenge worldwide and accurate prognostic evaluations are essential for personalized treatment plans. Recent evidence suggests that integrating multimodal data, such as computed tomography imaging, radiomic features, and clinical information, can provide more comprehensive prognostic information. However, modalities have an inherent heterogen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00695v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00695v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00695v1-abstract-full" style="display: none;"> Chronic liver disease represents a significant health challenge worldwide and accurate prognostic evaluations are essential for personalized treatment plans. Recent evidence suggests that integrating multimodal data, such as computed tomography imaging, radiomic features, and clinical information, can provide more comprehensive prognostic information. However, modalities have an inherent heterogeneity, and incorporating additional modalities may exacerbate the challenges of heterogeneous data fusion. Moreover, existing multimodal fusion methods often struggle to adapt to richer medical modalities, making it difficult to capture inter-modal relationships. To overcome these limitations, We present the Triple-Modal Interaction Chronic Liver Network (TMI-CLNet). Specifically, we develop an Intra-Modality Aggregation module and a Triple-Modal Cross-Attention Fusion module, which are designed to eliminate intra-modality redundancy and extract cross-modal information, respectively. Furthermore, we design a Triple-Modal Feature Fusion loss function to align feature representations across modalities. Extensive experiments on the liver prognosis dataset demonstrate that our approach significantly outperforms existing state-of-the-art unimodal models and other multi-modal techniques. Our code is available at https://github.com/Mysterwll/liver.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00695v1-abstract-full').style.display = 'none'; document.getElementById('2502.00695v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures, accepted by IEEE ISBI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00354">arXiv:2502.00354</a> <span> [<a href="https://arxiv.org/pdf/2502.00354">pdf</a>, <a href="https://arxiv.org/format/2502.00354">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3696410.3714561">10.1145/3696410.3714561 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PM-MOE: Mixture of Experts on Private Model Parameters for Personalized Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yu Feng</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+Y">Yangli-ao Geng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zongfu Han</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xie Yu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+K">Kaiwen Xue</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengyang Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guangwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Meina Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00354v1-abstract-short" style="display: inline;"> Federated learning (FL) has gained widespread attention for its privacy-preserving and collaborative learning capabilities. Due to significant statistical heterogeneity, traditional FL struggles to generalize a shared model across diverse data domains. Personalized federated learning addresses this issue by dividing the model into a globally shared part and a locally private part, with the local m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00354v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00354v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00354v1-abstract-full" style="display: none;"> Federated learning (FL) has gained widespread attention for its privacy-preserving and collaborative learning capabilities. Due to significant statistical heterogeneity, traditional FL struggles to generalize a shared model across diverse data domains. Personalized federated learning addresses this issue by dividing the model into a globally shared part and a locally private part, with the local model correcting representation biases introduced by the global model. Nevertheless, locally converged parameters more accurately capture domain-specific knowledge, and current methods overlook the potential benefits of these parameters. To address these limitations, we propose PM-MoE architecture. This architecture integrates a mixture of personalized modules and an energy-based personalized modules denoising, enabling each client to select beneficial personalized parameters from other clients. We applied the PM-MoE architecture to nine recent model-split-based personalized federated learning algorithms, achieving performance improvements with minimal additional training. Extensive experiments on six widely adopted datasets and two heterogeneity settings validate the effectiveness of our approach. The source code is available at \url{https://github.com/dannis97500/PM-MOE}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00354v1-abstract-full').style.display = 'none'; document.getElementById('2502.00354v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18922">arXiv:2501.18922</a> <span> [<a href="https://arxiv.org/pdf/2501.18922">pdf</a>, <a href="https://arxiv.org/format/2501.18922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> KBQA-o1: Agentic Knowledge Base Question Answering with Monte Carlo Tree Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=E%2C+H">Haihong E</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yikai Guo</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Qika Lin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaobao Wu</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+X">Xinyu Mu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenhao Liu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Meina Song</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Tuan%2C+L+A">Luu Anh Tuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18922v1-abstract-short" style="display: inline;"> Knowledge Base Question Answering (KBQA) aims to answer natural language questions with a large-scale structured knowledge base (KB). Despite advancements with large language models (LLMs), KBQA still faces challenges in weak KB awareness, imbalance between effectiveness and efficiency, and high reliance on annotated data. To address these challenges, we propose KBQA-o1, a novel agentic KBQA metho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18922v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18922v1-abstract-full" style="display: none;"> Knowledge Base Question Answering (KBQA) aims to answer natural language questions with a large-scale structured knowledge base (KB). Despite advancements with large language models (LLMs), KBQA still faces challenges in weak KB awareness, imbalance between effectiveness and efficiency, and high reliance on annotated data. To address these challenges, we propose KBQA-o1, a novel agentic KBQA method with Monte Carlo Tree Search (MCTS). It introduces a ReAct-based agent process for stepwise logical form generation with KB environment exploration. Moreover, it employs MCTS, a heuristic search method driven by policy and reward models, to balance agentic exploration's performance and search space. With heuristic exploration, KBQA-o1 generates high-quality annotations for further improvement by incremental fine-tuning. Experimental results show that KBQA-o1 outperforms previous low-resource KBQA methods with limited annotated data, boosting Llama-3.1-8B model's GrailQA F1 performance to 78.5% compared to 48.5% of the previous sota method with GPT-3.5-turbo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18922v1-abstract-full').style.display = 'none'; document.getElementById('2501.18922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18100">arXiv:2501.18100</a> <span> [<a href="https://arxiv.org/pdf/2501.18100">pdf</a>, <a href="https://arxiv.org/format/2501.18100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Panacea: Mitigating Harmful Fine-tuning for Large Language Models via Post-fine-tuning Perturbation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yibo Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tiansheng Huang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+L">Li Shen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+H">Huanjin Yao</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haotian Luo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+R">Rui Liu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+N">Naiqiang Tan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiaxing Huang</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18100v1-abstract-short" style="display: inline;"> Harmful fine-tuning attack introduces significant security risks to the fine-tuning services. Mainstream defenses aim to vaccinate the model such that the later harmful fine-tuning attack is less effective. However, our evaluation results show that such defenses are fragile -- with a few fine-tuning steps, the model still can learn the harmful knowledge. To this end, we do further experiment and f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18100v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18100v1-abstract-full" style="display: none;"> Harmful fine-tuning attack introduces significant security risks to the fine-tuning services. Mainstream defenses aim to vaccinate the model such that the later harmful fine-tuning attack is less effective. However, our evaluation results show that such defenses are fragile -- with a few fine-tuning steps, the model still can learn the harmful knowledge. To this end, we do further experiment and find that an embarrassingly simple solution -- adding purely random perturbations to the fine-tuned model, can recover the model from harmful behavior, though it leads to a degradation in the model's fine-tuning performance. To address the degradation of fine-tuning performance, we further propose Panacea, which optimizes an adaptive perturbation that will be applied to the model after fine-tuning. Panacea maintains model's safety alignment performance without compromising downstream fine-tuning performance. Comprehensive experiments are conducted on different harmful ratios, fine-tuning tasks and mainstream LLMs, where the average harmful scores are reduced by up-to 21.5%, while maintaining fine-tuning performance. As a by-product, we analyze the optimized perturbation and show that different layers in various LLMs have distinct safety coefficients. Source code available at https://github.com/w-yibo/Panacea <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18100v1-abstract-full').style.display = 'none'; document.getElementById('2501.18100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14892">arXiv:2501.14892</a> <span> [<a href="https://arxiv.org/pdf/2501.14892">pdf</a>, <a href="https://arxiv.org/format/2501.14892">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Causal Graphs Meet Thoughts: Enhancing Complex Reasoning in Graph-Augmented LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hang Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jian Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chujun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14892v2-abstract-short" style="display: inline;"> In knowledge-intensive tasks, especially in high-stakes domains like medicine and law, it is critical not only to retrieve relevant information but also to provide causal reasoning and explainability. Large language models (LLMs) have achieved remarkable performance in natural language understanding and generation tasks. However, they often suffer from limitations such as difficulty in incorporati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14892v2-abstract-full').style.display = 'inline'; document.getElementById('2501.14892v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14892v2-abstract-full" style="display: none;"> In knowledge-intensive tasks, especially in high-stakes domains like medicine and law, it is critical not only to retrieve relevant information but also to provide causal reasoning and explainability. Large language models (LLMs) have achieved remarkable performance in natural language understanding and generation tasks. However, they often suffer from limitations such as difficulty in incorporating new knowledge, generating hallucinations, and explaining their reasoning process. To address these challenges, integrating knowledge graphs with Graph Retrieval-Augmented Generation (Graph RAG) has emerged as an effective solution. Traditional Graph RAG methods often rely on simple graph traversal or semantic similarity, which do not capture causal relationships or align well with the model's internal reasoning steps. This paper proposes a novel pipeline that filters large knowledge graphs to emphasize cause-effect edges, aligns the retrieval process with the model's chain-of-thought (CoT), and enhances reasoning through multi-stage path improvements. Experiments on medical question-answering tasks show consistent gains, with up to a 10\% absolute improvement across multiple large language models (LLMs). This approach demonstrates the value of combining causal reasoning with stepwise retrieval, leading to more interpretable and logically grounded solutions for complex queries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14892v2-abstract-full').style.display = 'none'; document.getElementById('2501.14892v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 3 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12666">arXiv:2501.12666</a> <span> [<a href="https://arxiv.org/pdf/2501.12666">pdf</a>, <a href="https://arxiv.org/format/2501.12666">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Explicit Eigenvalue Regularization Improves Sharpness-Aware Minimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haocheng Luo</a>, <a href="/search/cs?searchtype=author&query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+T">Tung Pham</a>, <a href="/search/cs?searchtype=author&query=Harandi%2C+M">Mehrtash Harandi</a>, <a href="/search/cs?searchtype=author&query=Phung%2C+D">Dinh Phung</a>, <a href="/search/cs?searchtype=author&query=Le%2C+T">Trung Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12666v1-abstract-short" style="display: inline;"> Sharpness-Aware Minimization (SAM) has attracted significant attention for its effectiveness in improving generalization across various tasks. However, its underlying principles remain poorly understood. In this work, we analyze SAM's training dynamics using the maximum eigenvalue of the Hessian as a measure of sharpness, and propose a third-order stochastic differential equation (SDE), which reve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12666v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12666v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12666v1-abstract-full" style="display: none;"> Sharpness-Aware Minimization (SAM) has attracted significant attention for its effectiveness in improving generalization across various tasks. However, its underlying principles remain poorly understood. In this work, we analyze SAM's training dynamics using the maximum eigenvalue of the Hessian as a measure of sharpness, and propose a third-order stochastic differential equation (SDE), which reveals that the dynamics are driven by a complex mixture of second- and third-order terms. We show that alignment between the perturbation vector and the top eigenvector is crucial for SAM's effectiveness in regularizing sharpness, but find that this alignment is often inadequate in practice, limiting SAM's efficiency. Building on these insights, we introduce Eigen-SAM, an algorithm that explicitly aims to regularize the top Hessian eigenvalue by aligning the perturbation vector with the leading eigenvector. We validate the effectiveness of our theory and the practical advantages of our proposed approach through comprehensive experiments. Code is available at https://github.com/RitianLuo/EigenSAM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12666v1-abstract-full').style.display = 'none'; document.getElementById('2501.12666v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12570">arXiv:2501.12570</a> <span> [<a href="https://arxiv.org/pdf/2501.12570">pdf</a>, <a href="https://arxiv.org/format/2501.12570">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> O1-Pruner: Length-Harmonizing Fine-Tuning for O1-Like Reasoning Pruning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haotian Luo</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+L">Li Shen</a>, <a href="/search/cs?searchtype=author&query=He%2C+H">Haiying He</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yibo Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shiwei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+N">Naiqiang Tan</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+X">Xiaochun Cao</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12570v2-abstract-short" style="display: inline;"> Recently, long-thought reasoning LLMs, such as OpenAI's O1, adopt extended reasoning processes similar to how humans ponder over complex problems. This reasoning paradigm significantly enhances the model's problem-solving abilities and has achieved promising results. However, long-thought reasoning process leads to a substantial increase in inference time. A pressing challenge is reducing the infe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12570v2-abstract-full').style.display = 'inline'; document.getElementById('2501.12570v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12570v2-abstract-full" style="display: none;"> Recently, long-thought reasoning LLMs, such as OpenAI's O1, adopt extended reasoning processes similar to how humans ponder over complex problems. This reasoning paradigm significantly enhances the model's problem-solving abilities and has achieved promising results. However, long-thought reasoning process leads to a substantial increase in inference time. A pressing challenge is reducing the inference overhead of long-thought LLMs while ensuring accuracy. In this paper, we experimentally demonstrate that long-thought reasoning models struggle to effectively allocate token budgets based on problem difficulty and reasoning redundancies. To address this, we propose Length-Harmonizing Fine-Tuning (O1-Pruner), aiming at minimizing reasoning overhead while maintaining accuracy. This effective fine-tuning method first estimates the LLM's baseline performance through pre-sampling and then uses RL-style fine-tuning to encourage the model to generate shorter reasoning processes under accuracy constraints. This allows the model to achieve efficient reasoning with lower redundancy while maintaining accuracy. Experiments on various mathematical reasoning benchmarks show that O1-Pruner not only significantly reduces inference overhead but also achieves higher accuracy, providing a novel and promising solution to this challenge. Our code is coming soon at https://github.com/StarDewXXX/O1-Pruner <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12570v2-abstract-full').style.display = 'none'; document.getElementById('2501.12570v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10935">arXiv:2501.10935</a> <span> [<a href="https://arxiv.org/pdf/2501.10935">pdf</a>, <a href="https://arxiv.org/format/2501.10935">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TSVC:Tripartite Learning with Semantic Variation Consistency for Robust Image-Text Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+S">Shuai Lyu</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Z">Zijing Tian</a>, <a href="/search/cs?searchtype=author&query=Ou%2C+Z">Zhonghong Ou</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Ha%2C+Q">Qiankun Ha</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Meina Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10935v1-abstract-short" style="display: inline;"> Cross-modal retrieval maps data under different modality via semantic relevance. Existing approaches implicitly assume that data pairs are well-aligned and ignore the widely existing annotation noise, i.e., noisy correspondence (NC). Consequently, it inevitably causes performance degradation. Despite attempts that employ the co-teaching paradigm with identical architectures to provide distinct dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10935v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10935v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10935v1-abstract-full" style="display: none;"> Cross-modal retrieval maps data under different modality via semantic relevance. Existing approaches implicitly assume that data pairs are well-aligned and ignore the widely existing annotation noise, i.e., noisy correspondence (NC). Consequently, it inevitably causes performance degradation. Despite attempts that employ the co-teaching paradigm with identical architectures to provide distinct data perspectives, the differences between these architectures are primarily stemmed from random initialization. Thus, the model becomes increasingly homogeneous along with the training process. Consequently, the additional information brought by this paradigm is severely limited. In order to resolve this problem, we introduce a Tripartite learning with Semantic Variation Consistency (TSVC) for robust image-text retrieval. We design a tripartite cooperative learning mechanism comprising a Coordinator, a Master, and an Assistant model. The Coordinator distributes data, and the Assistant model supports the Master model's noisy label prediction with diverse data. Moreover, we introduce a soft label estimation method based on mutual information variation, which quantifies the noise in new samples and assigns corresponding soft labels. We also present a new loss function to enhance robustness and optimize training effectiveness. Extensive experiments on three widely used datasets demonstrate that, even at increasing noise ratios, TSVC exhibits significant advantages in retrieval accuracy and maintains stable training performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10935v1-abstract-full').style.display = 'none'; document.getElementById('2501.10935v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted to the Main Track of AAAI 2025. It contains 9 pages, 7 figures, and is relevant to the areas of cross-modal retrieval and machine learning. The work presents a novel approach in robust image-text retrieval using a tripartite learning framework</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10131">arXiv:2501.10131</a> <span> [<a href="https://arxiv.org/pdf/2501.10131">pdf</a>, <a href="https://arxiv.org/format/2501.10131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ACE: Anatomically Consistent Embeddings in Composition and Decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Ziyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haozhe Luo</a>, <a href="/search/cs?searchtype=author&query=Taher%2C+M+R+H">Mohammad Reza Hosseinzadeh Taher</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+J">Jiaxuan Pang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+X">Xiaowei Ding</a>, <a href="/search/cs?searchtype=author&query=Gotway%2C+M">Michael Gotway</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jianming Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10131v1-abstract-short" style="display: inline;"> Medical images acquired from standardized protocols show consistent macroscopic or microscopic anatomical structures, and these structures consist of composable/decomposable organs and tissues, but existing self-supervised learning (SSL) methods do not appreciate such composable/decomposable structure attributes inherent to medical images. To overcome this limitation, this paper introduces a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10131v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10131v1-abstract-full" style="display: none;"> Medical images acquired from standardized protocols show consistent macroscopic or microscopic anatomical structures, and these structures consist of composable/decomposable organs and tissues, but existing self-supervised learning (SSL) methods do not appreciate such composable/decomposable structure attributes inherent to medical images. To overcome this limitation, this paper introduces a novel SSL approach called ACE to learn anatomically consistent embedding via composition and decomposition with two key branches: (1) global consistency, capturing discriminative macro-structures via extracting global features; (2) local consistency, learning fine-grained anatomical details from composable/decomposable patch features via corresponding matrix matching. Experimental results across 6 datasets 2 backbones, evaluated in few-shot learning, fine-tuning, and property analysis, show ACE's superior robustness, transferability, and clinical potential. The innovations of our ACE lie in grid-wise image cropping, leveraging the intrinsic properties of compositionality and decompositionality of medical images, bridging the semantic gap from high-level pathologies to low-level tissue anomalies, and providing a new SSL method for medical imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10131v1-abstract-full').style.display = 'none'; document.getElementById('2501.10131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WACV 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06282">arXiv:2501.06282</a> <span> [<a href="https://arxiv.org/pdf/2501.06282">pdf</a>, <a href="https://arxiv.org/format/2501.06282">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> MinMo: A Multimodal Large Language Model for Seamless Voice Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qian Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yafeng Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yanni Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mengzhe Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yingda Chen</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+C">Chong Deng</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Z">Zhihao Du</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+R">Ruize Gao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Changfeng Gao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Zhifu Gao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yabin Li</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+X">Xiang Lv</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaqing Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoneng Luo</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+B">Bin Ma</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+C">Chongjia Ni</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xian Shi</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jialong Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hui Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wen Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yunlan Xu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+F">Fan Yu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Zhijie Yan</a> , et al. (11 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06282v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) and multimodal speech-text models have laid the groundwork for seamless voice interactions, enabling real-time, natural, and human-like conversations. Previous models for voice interactions are categorized as native and aligned. Native models integrate speech and text processing in one framework but struggle with issues like differing sequence le… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06282v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06282v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06282v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) and multimodal speech-text models have laid the groundwork for seamless voice interactions, enabling real-time, natural, and human-like conversations. Previous models for voice interactions are categorized as native and aligned. Native models integrate speech and text processing in one framework but struggle with issues like differing sequence lengths and insufficient pre-training. Aligned models maintain text LLM capabilities but are often limited by small datasets and a narrow focus on speech tasks. In this work, we introduce MinMo, a Multimodal Large Language Model with approximately 8B parameters for seamless voice interaction. We address the main limitations of prior aligned multimodal models. We train MinMo through multiple stages of speech-to-text alignment, text-to-speech alignment, speech-to-speech alignment, and duplex interaction alignment, on 1.4 million hours of diverse speech data and a broad range of speech tasks. After the multi-stage training, MinMo achieves state-of-the-art performance across various benchmarks for voice comprehension and generation while maintaining the capabilities of text LLMs, and also facilitates full-duplex conversation, that is, simultaneous two-way communication between the user and the system. Moreover, we propose a novel and simple voice decoder that outperforms prior models in voice generation. The enhanced instruction-following capabilities of MinMo supports controlling speech generation based on user instructions, with various nuances including emotions, dialects, and speaking rates, and mimicking specific voices. For MinMo, the speech-to-text latency is approximately 100ms, full-duplex latency is approximately 600ms in theory and 800ms in practice. The MinMo project web page is https://funaudiollm.github.io/minmo, and the code and models will be released soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06282v1-abstract-full').style.display = 'none'; document.getElementById('2501.06282v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress. Authors are listed in alphabetical order by family name</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02749">arXiv:2501.02749</a> <span> [<a href="https://arxiv.org/pdf/2501.02749">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Intelligent logistics management robot path planning algorithm integrating transformer and GCN network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+J">Jianjun Wei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shuchen Zhao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+A">Ankai Liang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhongjin Xu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+R">Ruxue Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02749v2-abstract-short" style="display: inline;"> This research delves into advanced route optimization for robots in smart logistics, leveraging a fusion of Transformer architectures, Graph Neural Networks (GNNs), and Generative Adversarial Networks (GANs). The approach utilizes a graph-based representation encompassing geographical data, cargo allocation, and robot dynamics, addressing both spatial and resource limitations to refine route effic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02749v2-abstract-full').style.display = 'inline'; document.getElementById('2501.02749v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02749v2-abstract-full" style="display: none;"> This research delves into advanced route optimization for robots in smart logistics, leveraging a fusion of Transformer architectures, Graph Neural Networks (GNNs), and Generative Adversarial Networks (GANs). The approach utilizes a graph-based representation encompassing geographical data, cargo allocation, and robot dynamics, addressing both spatial and resource limitations to refine route efficiency. Through extensive testing with authentic logistics datasets, the proposed method achieves notable improvements, including a 15% reduction in travel distance, a 20% boost in time efficiency, and a 10% decrease in energy consumption. These findings highlight the algorithm's effectiveness, promoting enhanced performance in intelligent logistics operations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02749v2-abstract-full').style.display = 'none'; document.getElementById('2501.02749v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01427">arXiv:2501.01427</a> <span> [<a href="https://arxiv.org/pdf/2501.01427">pdf</a>, <a href="https://arxiv.org/format/2501.01427">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VideoAnydoor: High-fidelity Video Object Insertion with Precise Motion Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tu%2C+Y">Yuanpeng Tu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+S">Sihui Ji</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+X">Xiang Bai</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengshuang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01427v3-abstract-short" style="display: inline;"> Despite significant advancements in video generation, inserting a given object into videos remains a challenging task. The difficulty lies in preserving the appearance details of the reference object and accurately modeling coherent motions at the same time. In this paper, we propose VideoAnydoor, a zero-shot video object insertion framework with high-fidelity detail preservation and precise motio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01427v3-abstract-full').style.display = 'inline'; document.getElementById('2501.01427v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01427v3-abstract-full" style="display: none;"> Despite significant advancements in video generation, inserting a given object into videos remains a challenging task. The difficulty lies in preserving the appearance details of the reference object and accurately modeling coherent motions at the same time. In this paper, we propose VideoAnydoor, a zero-shot video object insertion framework with high-fidelity detail preservation and precise motion control. Starting from a text-to-video model, we utilize an ID extractor to inject the global identity and leverage a box sequence to control the overall motion. To preserve the detailed appearance and meanwhile support fine-grained motion control, we design a pixel warper. It takes the reference image with arbitrary key-points and the corresponding key-point trajectories as inputs. It warps the pixel details according to the trajectories and fuses the warped features with the diffusion U-Net, thus improving detail preservation and supporting users in manipulating the motion trajectories. In addition, we propose a training strategy involving both videos and static images with a weighted loss to enhance insertion quality. VideoAnydoor demonstrates significant superiority over existing methods and naturally supports various downstream applications (e.g., talking head generation, video virtual try-on, multi-region editing) without task-specific fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01427v3-abstract-full').style.display = 'none'; document.getElementById('2501.01427v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://videoanydoor.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01425">arXiv:2501.01425</a> <span> [<a href="https://arxiv.org/pdf/2501.01425">pdf</a>, <a href="https://arxiv.org/format/2501.01425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Free-Form Motion Control: A Synthetic Video Generation Dataset with Controllable Camera and Object Motions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shuai%2C+X">Xincheng Shuai</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+H">Henghui Ding</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Z">Zhenyuan Qin</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xingjun Ma</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01425v2-abstract-short" style="display: inline;"> Controlling the movements of dynamic objects and the camera within generated videos is a meaningful yet challenging task. Due to the lack of datasets with comprehensive motion annotations, existing algorithms can not simultaneously control the motions of both camera and objects, resulting in limited controllability over generated contents. To address this issue and facilitate the research in this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01425v2-abstract-full').style.display = 'inline'; document.getElementById('2501.01425v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01425v2-abstract-full" style="display: none;"> Controlling the movements of dynamic objects and the camera within generated videos is a meaningful yet challenging task. Due to the lack of datasets with comprehensive motion annotations, existing algorithms can not simultaneously control the motions of both camera and objects, resulting in limited controllability over generated contents. To address this issue and facilitate the research in this field, we introduce a Synthetic Dataset for Free-Form Motion Control (SynFMC). The proposed SynFMC dataset includes diverse objects and environments and covers various motion patterns according to specific rules, simulating common and complex real-world scenarios. The complete 6D pose information facilitates models learning to disentangle the motion effects from objects and the camera in a video. To validate the effectiveness and generalization of SynFMC, we further propose a method, Free-Form Motion Control (FMC). FMC enables independent or simultaneous control of object and camera movements, producing high-fidelity videos. Moreover, it is compatible with various personalized text-to-image (T2I) models for different content styles. Extensive experiments demonstrate that the proposed FMC outperforms previous methods across multiple scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01425v2-abstract-full').style.display = 'none'; document.getElementById('2501.01425v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://henghuiding.github.io/SynFMC/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18588">arXiv:2412.18588</a> <span> [<a href="https://arxiv.org/pdf/2412.18588">pdf</a>, <a href="https://arxiv.org/format/2412.18588">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Paragraph is All It Takes: Rich Robot Behaviors from Interacting, Trusted LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=OpenMind"> OpenMind</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+S">Shaohong Zhong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Adam Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Boyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Homin Luo</a>, <a href="/search/cs?searchtype=author&query=Liphardt%2C+J">Jan Liphardt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18588v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) are compact representations of all public knowledge of our physical environment and animal and human behaviors. The application of LLMs to robotics may offer a path to highly capable robots that perform well across most human tasks with limited or even zero tuning. Aside from increasingly sophisticated reasoning and task planning, networks of (suitably designed) LLMs o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18588v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18588v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18588v1-abstract-full" style="display: none;"> Large Language Models (LLMs) are compact representations of all public knowledge of our physical environment and animal and human behaviors. The application of LLMs to robotics may offer a path to highly capable robots that perform well across most human tasks with limited or even zero tuning. Aside from increasingly sophisticated reasoning and task planning, networks of (suitably designed) LLMs offer ease of upgrading capabilities and allow humans to directly observe the robot's thinking. Here we explore the advantages, limitations, and particularities of using LLMs to control physical robots. The basic system consists of four LLMs communicating via a human language data bus implemented via web sockets and ROS2 message passing. Surprisingly, rich robot behaviors and good performance across different tasks could be achieved despite the robot's data fusion cycle running at only 1Hz and the central data bus running at the extremely limited rates of the human brain, of around 40 bits/s. The use of natural language for inter-LLM communication allowed the robot's reasoning and decision making to be directly observed by humans and made it trivial to bias the system's behavior with sets of rules written in plain English. These rules were immutably written into Ethereum, a global, public, and censorship resistant Turing-complete computer. We suggest that by using natural language as the data bus among interacting AIs, and immutable public ledgers to store behavior constraints, it is possible to build robots that combine unexpectedly rich performance, upgradability, and durable alignment with humans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18588v1-abstract-full').style.display = 'none'; document.getElementById('2412.18588v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18292">arXiv:2412.18292</a> <span> [<a href="https://arxiv.org/pdf/2412.18292">pdf</a>, <a href="https://arxiv.org/format/2412.18292">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Multi-Robot Semantic Navigation Through Multimodal Chain-of-Thought Score Collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhixuan Shen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haonan Luo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kexun Chen</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+F">Fengmao Lv</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tianrui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18292v2-abstract-short" style="display: inline;"> Understanding how humans cooperatively utilize semantic knowledge to explore unfamiliar environments and decide on navigation directions is critical for house service multi-robot systems. Previous methods primarily focused on single-robot centralized planning strategies, which severely limited exploration efficiency. Recent research has considered decentralized planning strategies for multiple rob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18292v2-abstract-full').style.display = 'inline'; document.getElementById('2412.18292v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18292v2-abstract-full" style="display: none;"> Understanding how humans cooperatively utilize semantic knowledge to explore unfamiliar environments and decide on navigation directions is critical for house service multi-robot systems. Previous methods primarily focused on single-robot centralized planning strategies, which severely limited exploration efficiency. Recent research has considered decentralized planning strategies for multiple robots, assigning separate planning models to each robot, but these approaches often overlook communication costs. In this work, we propose Multimodal Chain-of-Thought Co-Navigation (MCoCoNav), a modular approach that utilizes multimodal Chain-of-Thought to plan collaborative semantic navigation for multiple robots. MCoCoNav combines visual perception with Vision Language Models (VLMs) to evaluate exploration value through probabilistic scoring, thus reducing time costs and achieving stable outputs. Additionally, a global semantic map is used as a communication bridge, minimizing communication overhead while integrating observational results. Guided by scores that reflect exploration trends, robots utilize this map to assess whether to explore new frontier points or revisit history nodes. Experiments on HM3D_v0.2 and MP3D demonstrate the effectiveness of our approach. Our code is available at https://github.com/FrankZxShen/MCoCoNav.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18292v2-abstract-full').style.display = 'none'; document.getElementById('2412.18292v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 10 figures, Extended Version of accepted AAAI 2025 Paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17458">arXiv:2412.17458</a> <span> [<a href="https://arxiv.org/pdf/2412.17458">pdf</a>, <a href="https://arxiv.org/format/2412.17458">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TCSVT.2024.3479887">10.1109/TCSVT.2024.3479887 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Progressive Boundary Guided Anomaly Synthesis for Industrial Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qiyu Chen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Huiyuan Luo</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Han Gao</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+C">Chengkan Lv</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhengtao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17458v1-abstract-short" style="display: inline;"> Unsupervised anomaly detection methods can identify surface defects in industrial images by leveraging only normal samples for training. Due to the risk of overfitting when learning from a single class, anomaly synthesis strategies are introduced to enhance detection capability by generating artificial anomalies. However, existing strategies heavily rely on anomalous textures from auxiliary datase… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17458v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17458v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17458v1-abstract-full" style="display: none;"> Unsupervised anomaly detection methods can identify surface defects in industrial images by leveraging only normal samples for training. Due to the risk of overfitting when learning from a single class, anomaly synthesis strategies are introduced to enhance detection capability by generating artificial anomalies. However, existing strategies heavily rely on anomalous textures from auxiliary datasets. Moreover, their limitations in the coverage and directionality of anomaly synthesis may result in a failure to capture useful information and lead to significant redundancy. To address these issues, we propose a novel Progressive Boundary-guided Anomaly Synthesis (PBAS) strategy, which can directionally synthesize crucial feature-level anomalies without auxiliary textures. It consists of three core components: Approximate Boundary Learning (ABL), Anomaly Feature Synthesis (AFS), and Refined Boundary Optimization (RBO). To make the distribution of normal samples more compact, ABL first learns an approximate decision boundary by center constraint, which improves the center initialization through feature alignment. AFS then directionally synthesizes anomalies with more flexible scales guided by the hypersphere distribution of normal features. Since the boundary is so loose that it may contain real anomalies, RBO refines the decision boundary through the binary classification of artificial anomalies and normal features. Experimental results show that our method achieves state-of-the-art performance and the fastest detection speed on three widely used industrial datasets, including MVTec AD, VisA, and MPDD. The code will be available at: https://github.com/cqylunlun/PBAS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17458v1-abstract-full').style.display = 'none'; document.getElementById('2412.17458v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Circuits and Systems for Video Technology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.15130">arXiv:2412.15130</a> <span> [<a href="https://arxiv.org/pdf/2412.15130">pdf</a>, <a href="https://arxiv.org/ps/2412.15130">ps</a>, <a href="https://arxiv.org/format/2412.15130">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Continuous Flattening and Reversing of Convex Polyhedral Linkages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Demaine%2C+E+D">Erik D. Demaine</a>, <a href="/search/cs?searchtype=author&query=Demaine%2C+M+L">Martin L. Demaine</a>, <a href="/search/cs?searchtype=author&query=Hecher%2C+M">Markus Hecher</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+R">Rebecca Lin</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+V+H">Victor H. Luo</a>, <a href="/search/cs?searchtype=author&query=Nara%2C+C">Chie Nara</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.15130v1-abstract-short" style="display: inline;"> We prove two results about transforming any convex polyhedron, modeled as a linkage L of its edges. First, if we subdivide each edge of L in half, then L can be continuously flattened into a plane. Second, if L is equilateral and we again subdivide each edge in half, then L can be reversed, i.e., turned inside-out. A linear number of subdivisions is optimal up to constant factors, as we show (none… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15130v1-abstract-full').style.display = 'inline'; document.getElementById('2412.15130v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.15130v1-abstract-full" style="display: none;"> We prove two results about transforming any convex polyhedron, modeled as a linkage L of its edges. First, if we subdivide each edge of L in half, then L can be continuously flattened into a plane. Second, if L is equilateral and we again subdivide each edge in half, then L can be reversed, i.e., turned inside-out. A linear number of subdivisions is optimal up to constant factors, as we show (nonequilateral) examples that require a linear number of subdivisions. For nonequilateral linkages, we show that more subdivisions can be required: even a tetrahedron can require an arbitrary number of subdivisions to reverse. For nonequilateral tetrahedra, we provide an algorithm that matches this lower bound up to constant factors: logarithmic in the aspect ratio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15130v1-abstract-full').style.display = 'none'; document.getElementById('2412.15130v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68R10; 68Q17; 68U05 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.2.2; F.2.2; I.3.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14168">arXiv:2412.14168</a> <span> [<a href="https://arxiv.org/pdf/2412.14168">pdf</a>, <a href="https://arxiv.org/format/2412.14168">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FashionComposer: Compositional Fashion Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+S">Sihui Ji</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yiyang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaogang Xu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hao Luo</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengshuang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14168v2-abstract-short" style="display: inline;"> We present FashionComposer for compositional fashion image generation. Unlike previous methods, FashionComposer is highly flexible. It takes multi-modal input (i.e., text prompt, parametric human model, garment image, and face image) and supports personalizing the appearance, pose, and figure of the human and assigning multiple garments in one pass. To achieve this, we first develop a universal fr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14168v2-abstract-full').style.display = 'inline'; document.getElementById('2412.14168v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14168v2-abstract-full" style="display: none;"> We present FashionComposer for compositional fashion image generation. Unlike previous methods, FashionComposer is highly flexible. It takes multi-modal input (i.e., text prompt, parametric human model, garment image, and face image) and supports personalizing the appearance, pose, and figure of the human and assigning multiple garments in one pass. To achieve this, we first develop a universal framework capable of handling diverse input modalities. We construct scaled training data to enhance the model's robust compositional capabilities. To accommodate multiple reference images (garments and faces) seamlessly, we organize these references in a single image as an "asset library" and employ a reference UNet to extract appearance features. To inject the appearance features into the correct pixels in the generated result, we propose subject-binding attention. It binds the appearance features from different "assets" with the corresponding text features. In this way, the model could understand each asset according to their semantics, supporting arbitrary numbers and types of reference images. As a comprehensive solution, FashionComposer also supports many other applications like human album generation, diverse virtual try-on tasks, etc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14168v2-abstract-full').style.display = 'none'; document.getElementById('2412.14168v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://sihuiji.github.io/FashionComposer-Page</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Luo%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Luo%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>