CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 2,326 results for author: <span class="mathjax">Zhang, R</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Zhang%2C+R">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zhang, R"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zhang%2C+R&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zhang, R"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.19848">arXiv:2502.19848</a> <span> [<a href="https://arxiv.org/pdf/2502.19848">pdf</a>, <a href="https://arxiv.org/format/2502.19848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> One-for-More: Continual Diffusion Model for Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaofan Li</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xin Tan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhuo Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhizhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruixin Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+R">Rizen Guo</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+G">Guanna Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yulong Chen</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+Y">Yanyun Qu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lizhuang Ma</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yuan Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.19848v1-abstract-short" style="display: inline;"> With the rise of generative models, there is a growing interest in unifying all tasks within a generative framework. Anomaly detection methods also fall into this scope and utilize diffusion models to generate or reconstruct normal samples when given arbitrary anomaly images. However, our study found that the diffusion model suffers from severe ``faithfulness hallucination'' and ``catastrophic for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19848v1-abstract-full').style.display = 'inline'; document.getElementById('2502.19848v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.19848v1-abstract-full" style="display: none;"> With the rise of generative models, there is a growing interest in unifying all tasks within a generative framework. Anomaly detection methods also fall into this scope and utilize diffusion models to generate or reconstruct normal samples when given arbitrary anomaly images. However, our study found that the diffusion model suffers from severe ``faithfulness hallucination'' and ``catastrophic forgetting'', which can't meet the unpredictable pattern increments. To mitigate the above problems, we propose a continual diffusion model that uses gradient projection to achieve stable continual learning. Gradient projection deploys a regularization on the model updating by modifying the gradient towards the direction protecting the learned knowledge. But as a double-edged sword, it also requires huge memory costs brought by the Markov process. Hence, we propose an iterative singular value decomposition method based on the transitive property of linear representation, which consumes tiny memory and incurs almost no performance loss. Finally, considering the risk of ``over-fitting'' to normal images of the diffusion model, we propose an anomaly-masked network to enhance the condition mechanism of the diffusion model. For continual anomaly detection, ours achieves first place in 17/18 settings on MVTec and VisA. Code is available at https://github.com/FuNz-0/One-for-More <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19848v1-abstract-full').style.display = 'none'; document.getElementById('2502.19848v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.19822">arXiv:2502.19822</a> <span> [<a href="https://arxiv.org/pdf/2502.19822">pdf</a>, <a href="https://arxiv.org/format/2502.19822">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706599.3719736">10.1145/3706599.3719736 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Empowering Social Service with AI: Insights from a Participatory Design Study with Practitioners </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+Y">Yugin Tan</a>, <a href="/search/cs?searchtype=author&query=Soh%2C+K+X">Kai Xin Soh</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Renwen Zhang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jungup Lee</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+H">Han Meng</a>, <a href="/search/cs?searchtype=author&query=Sen%2C+B">Biswadeep Sen</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Yi-Chieh Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.19822v1-abstract-short" style="display: inline;"> In social service, administrative burdens and decision-making challenges often hinder practitioners from performing effective casework. Generative AI (GenAI) offers significant potential to streamline these tasks, yet exacerbates concerns about overreliance, algorithmic bias, and loss of identity within the profession. We explore these issues through a two-stage participatory design study. We cond… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19822v1-abstract-full').style.display = 'inline'; document.getElementById('2502.19822v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.19822v1-abstract-full" style="display: none;"> In social service, administrative burdens and decision-making challenges often hinder practitioners from performing effective casework. Generative AI (GenAI) offers significant potential to streamline these tasks, yet exacerbates concerns about overreliance, algorithmic bias, and loss of identity within the profession. We explore these issues through a two-stage participatory design study. We conducted formative co-design workshops (\textit{n=27}) to create a prototype GenAI tool, followed by contextual inquiry sessions with practitioners (\textit{n=24}) using the tool with real case data. We reveal opportunities for AI integration in documentation, assessment, and worker supervision, while highlighting risks related to GenAI limitations, skill retention, and client safety. Drawing comparisons with GenAI tools in other fields, we discuss design and usage guidelines for such tools in social service practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19822v1-abstract-full').style.display = 'none'; document.getElementById('2502.19822v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18808">arXiv:2502.18808</a> <span> [<a href="https://arxiv.org/pdf/2502.18808">pdf</a>, <a href="https://arxiv.org/format/2502.18808">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Optimal Stochastic Trace Estimation in Generative Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xinyang Liu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hengrong Du</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+W">Wei Deng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18808v1-abstract-short" style="display: inline;"> Hutchinson estimators are widely employed in training divergence-based likelihoods for diffusion models to ensure optimal transport (OT) properties. However, this estimator often suffers from high variance and scalability concerns. To address these challenges, we investigate Hutch++, an optimal stochastic trace estimator for generative models, designed to minimize training variance while maintaini… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18808v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18808v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18808v1-abstract-full" style="display: none;"> Hutchinson estimators are widely employed in training divergence-based likelihoods for diffusion models to ensure optimal transport (OT) properties. However, this estimator often suffers from high variance and scalability concerns. To address these challenges, we investigate Hutch++, an optimal stochastic trace estimator for generative models, designed to minimize training variance while maintaining transport optimality. Hutch++ is particularly effective for handling ill-conditioned matrices with large condition numbers, which commonly arise when high-dimensional data exhibits a low-dimensional structure. To mitigate the need for frequent and costly QR decompositions, we propose practical schemes that balance frequency and accuracy, backed by theoretical guarantees. Our analysis demonstrates that Hutch++ leads to generations of higher quality. Furthermore, this method exhibits effective variance reduction in various applications, including simulations, conditional time series forecasts, and image generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18808v1-abstract-full').style.display = 'none'; document.getElementById('2502.18808v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AISTATS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18435">arXiv:2502.18435</a> <span> [<a href="https://arxiv.org/pdf/2502.18435">pdf</a>, <a href="https://arxiv.org/format/2502.18435">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Reversal Blessing: Thinking Backward May Outpace Thinking Forward in Multi-choice Questions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yizhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+R">Richard Bai</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Z">Zijin Gu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruixiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">Jiatao Gu</a>, <a href="/search/cs?searchtype=author&query=Abbe%2C+E">Emmanuel Abbe</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+S">Samy Bengio</a>, <a href="/search/cs?searchtype=author&query=Jaitly%2C+N">Navdeep Jaitly</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18435v1-abstract-short" style="display: inline;"> Language models usually use left-to-right (L2R) autoregressive factorization. However, L2R factorization may not always be the best inductive bias. Therefore, we investigate whether alternative factorizations of the text distribution could be beneficial in some tasks. We investigate right-to-left (R2L) training as a compelling alternative, focusing on multiple-choice questions (MCQs) as a test bed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18435v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18435v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18435v1-abstract-full" style="display: none;"> Language models usually use left-to-right (L2R) autoregressive factorization. However, L2R factorization may not always be the best inductive bias. Therefore, we investigate whether alternative factorizations of the text distribution could be beneficial in some tasks. We investigate right-to-left (R2L) training as a compelling alternative, focusing on multiple-choice questions (MCQs) as a test bed for knowledge extraction and reasoning. Through extensive experiments across various model sizes (2B-8B parameters) and training datasets, we find that R2L models can significantly outperform L2R models on several MCQ benchmarks, including logical reasoning, commonsense understanding, and truthfulness assessment tasks. Our analysis reveals that this performance difference may be fundamentally linked to multiple factors including calibration, computability and directional conditional entropy. We ablate the impact of these factors through controlled simulation studies using arithmetic tasks, where the impacting factors can be better disentangled. Our work demonstrates that exploring alternative factorizations of the text distribution can lead to improvements in LLM capabilities and provides theoretical insights into optimal factorization towards approximating human language distribution, and when each reasoning order might be more advantageous. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18435v1-abstract-full').style.display = 'none'; document.getElementById('2502.18435v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18431">arXiv:2502.18431</a> <span> [<a href="https://arxiv.org/pdf/2502.18431">pdf</a>, <a href="https://arxiv.org/format/2502.18431">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TextGames: Learning to Self-Play Text-Based Puzzle Games via Language Model Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hudi%2C+F">Frederikus Hudi</a>, <a href="/search/cs?searchtype=author&query=Winata%2C+G+I">Genta Indra Winata</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruochen Zhang</a>, <a href="/search/cs?searchtype=author&query=Aji%2C+A+F">Alham Fikri Aji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18431v1-abstract-short" style="display: inline;"> Reasoning is a fundamental capability of large language models (LLMs), enabling them to comprehend, analyze, and solve complex problems. In this paper, we introduce TextGames, an innovative benchmark specifically crafted to assess LLMs through demanding text-based games that require advanced skills in pattern recognition, spatial awareness, arithmetic, and logical reasoning. Our analysis probes LL… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18431v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18431v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18431v1-abstract-full" style="display: none;"> Reasoning is a fundamental capability of large language models (LLMs), enabling them to comprehend, analyze, and solve complex problems. In this paper, we introduce TextGames, an innovative benchmark specifically crafted to assess LLMs through demanding text-based games that require advanced skills in pattern recognition, spatial awareness, arithmetic, and logical reasoning. Our analysis probes LLMs' performance in both single-turn and multi-turn reasoning, and their abilities in leveraging feedback to correct subsequent answers through self-reflection. Our findings reveal that, although LLMs exhibit proficiency in addressing most easy and medium-level problems, they face significant challenges with more difficult tasks. In contrast, humans are capable of solving all tasks when given sufficient time. Moreover, we observe that LLMs show improved performance in multi-turn predictions through self-reflection, yet they still struggle with sequencing, counting, and following complex rules consistently. Additionally, models optimized for reasoning outperform pre-trained LLMs that prioritize instruction following, highlighting the crucial role of reasoning skills in addressing highly complex problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18431v1-abstract-full').style.display = 'none'; document.getElementById('2502.18431v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18300">arXiv:2502.18300</a> <span> [<a href="https://arxiv.org/pdf/2502.18300">pdf</a>, <a href="https://arxiv.org/format/2502.18300">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Computation in Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenlong Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bolian Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yingzhen Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18300v3-abstract-short" style="display: inline;"> This review paper is intended for the 2nd edition of the Handbook of Markov chain Monte Carlo. We provide an introduction to approximate inference techniques as Bayesian computation methods applied to deep learning models. We organize the chapter by presenting popular computational methods for Bayesian neural networks and deep generative models, explaining their unique challenges in posterior infe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18300v3-abstract-full').style.display = 'inline'; document.getElementById('2502.18300v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18300v3-abstract-full" style="display: none;"> This review paper is intended for the 2nd edition of the Handbook of Markov chain Monte Carlo. We provide an introduction to approximate inference techniques as Bayesian computation methods applied to deep learning models. We organize the chapter by presenting popular computational methods for Bayesian neural networks and deep generative models, explaining their unique challenges in posterior inference as well as the solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18300v3-abstract-full').style.display = 'none'; document.getElementById('2502.18300v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">43 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17905">arXiv:2502.17905</a> <span> [<a href="https://arxiv.org/pdf/2502.17905">pdf</a>, <a href="https://arxiv.org/format/2502.17905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Tutorial on Movable Antennas for Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wenyan Ma</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+W">Weidong Mei</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yong Zeng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/cs?searchtype=author&query=Ning%2C+B">Boyu Ning</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+X">Xiaodan Shao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17905v1-abstract-short" style="display: inline;"> Movable antenna (MA) has been recognized as a promising technology to enhance the performance of wireless communication and sensing by enabling antenna movement. Such a significant paradigm shift from conventional fixed antennas (FAs) to MAs offers tremendous new opportunities towards realizing more versatile, adaptive and efficient next-generation wireless networks such as 6G. In this paper, we p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17905v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17905v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17905v1-abstract-full" style="display: none;"> Movable antenna (MA) has been recognized as a promising technology to enhance the performance of wireless communication and sensing by enabling antenna movement. Such a significant paradigm shift from conventional fixed antennas (FAs) to MAs offers tremendous new opportunities towards realizing more versatile, adaptive and efficient next-generation wireless networks such as 6G. In this paper, we provide a comprehensive tutorial on the fundamentals and advancements in the area of MA-empowered wireless networks. First, we overview the historical development and contemporary applications of MA technologies. Next, to characterize the continuous variation in wireless channels with respect to antenna position and/or orientation, we present new field-response channel models tailored for MAs, which are applicable to narrowband and wideband systems as well as far-field and near-field propagation conditions. Subsequently, we review the state-of-the-art architectures for implementing MAs and discuss their practical constraints. A general optimization framework is then formulated to fully exploit the spatial degrees of freedom (DoFs) in antenna movement for performance enhancement in wireless systems. In particular, we delve into two major design issues for MA systems. First, we address the intricate antenna movement optimization problem for various communication and/or sensing systems to maximize the performance gains achievable by MAs. Second, we deal with the challenging channel acquisition issue in MA systems for reconstructing the channel mapping between arbitrary antenna positions inside the transmitter and receiver regions. Moreover, we show existing prototypes developed for MA-aided communication/sensing and the experimental results based on them. Finally, the extension of MA design to other wireless systems and its synergy with other emerging wireless technologies are discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17905v1-abstract-full').style.display = 'none'; document.getElementById('2502.17905v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publiation in the IEEE Communications Surveys & Tutorials</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17759">arXiv:2502.17759</a> <span> [<a href="https://arxiv.org/pdf/2502.17759">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Label-free Prediction of Vascular Connectivity in Perfused Microvascular Networks in vitro </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+L">Liang Xu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+P">Pengwu Song</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Shilu Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ru Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zhiyuan Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qingdong Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jie Gao</a>, <a href="/search/cs?searchtype=author&query=Han%2C+C">Chen Han</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingzhai Sun</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+P">Peng Yao</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+M">Min Ye</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R+X">Ronald X. Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17759v1-abstract-short" style="display: inline;"> Continuous monitoring and in-situ assessment of microvascular connectivity have significant implications for culturing vascularized organoids and optimizing the therapeutic strategies. However, commonly used methods for vascular connectivity assessment heavily rely on fluorescent labels that may either raise biocompatibility concerns or interrupt the normal cell growth process. To address this iss… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17759v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17759v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17759v1-abstract-full" style="display: none;"> Continuous monitoring and in-situ assessment of microvascular connectivity have significant implications for culturing vascularized organoids and optimizing the therapeutic strategies. However, commonly used methods for vascular connectivity assessment heavily rely on fluorescent labels that may either raise biocompatibility concerns or interrupt the normal cell growth process. To address this issue, a Vessel Connectivity Network (VC-Net) was developed for label-free assessment of vascular connectivity. To validate the VC-Net, microvascular networks (MVNs) were cultured in vitro and their microscopic images were acquired at different culturing conditions as a training dataset. The VC-Net employs a Vessel Queue Contrastive Learning (VQCL) method and a class imbalance algorithm to address the issues of limited sample size, indistinctive class features and imbalanced class distribution in the dataset. The VC-Net successfully evaluated the vascular connectivity with no significant deviation from that by fluorescence imaging. In addition, the proposed VC-Net successfully differentiated the connectivity characteristics between normal and tumor-related MVNs. In comparison with those cultured in the regular microenvironment, the averaged connectivity of MVNs cultured in the tumor-related microenvironment decreased by 30.8%, whereas the non-connected area increased by 37.3%. This study provides a new avenue for label-free and continuous assessment of organoid or tumor vascularization in vitro. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17759v1-abstract-full').style.display = 'none'; document.getElementById('2502.17759v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17494">arXiv:2502.17494</a> <span> [<a href="https://arxiv.org/pdf/2502.17494">pdf</a>, <a href="https://arxiv.org/format/2502.17494">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> External Large Foundation Model: How to Efficiently Serve Trillions of Parameters for Online Ads Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+M">Mingfu Liang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xi Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+R">Rong Jin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Boyang Liu</a>, <a href="/search/cs?searchtype=author&query=Suo%2C+Q">Qiuling Suo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qinghai Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Song Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Laming Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Hua Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiyuan Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+S">Shali Jiang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiyan Yang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+X">Xiaozhen Xia</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&query=Badr%2C+Y">Yasmine Badr</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+E">Ellie Wen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+S">Shuyu Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hansey Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhengyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jade Nie</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chunzhi Yang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhichen Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weilin Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xingliang Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qianru Li</a> , et al. (74 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17494v2-abstract-short" style="display: inline;"> Ads recommendation is a prominent service of online advertising systems and has been actively studied. Recent studies indicate that scaling-up and advanced design of the recommendation model can bring significant performance improvement. However, with a larger model scale, such prior studies have a significantly increasing gap from industry as they often neglect two fundamental challenges in indus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17494v2-abstract-full').style.display = 'inline'; document.getElementById('2502.17494v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17494v2-abstract-full" style="display: none;"> Ads recommendation is a prominent service of online advertising systems and has been actively studied. Recent studies indicate that scaling-up and advanced design of the recommendation model can bring significant performance improvement. However, with a larger model scale, such prior studies have a significantly increasing gap from industry as they often neglect two fundamental challenges in industrial-scale applications. First, training and inference budgets are restricted for the model to be served, exceeding which may incur latency and impair user experience. Second, large-volume data arrive in a streaming mode with data distributions dynamically shifting, as new users/ads join and existing users/ads leave the system. We propose the External Large Foundation Model (ExFM) framework to address the overlooked challenges. Specifically, we develop external distillation and a data augmentation system (DAS) to control the computational cost of training/inference while maintaining high performance. We design the teacher in a way like a foundation model (FM) that can serve multiple students as vertical models (VMs) to amortize its building cost. We propose Auxiliary Head and Student Adapter to mitigate the data distribution gap between FM and VMs caused by the streaming data issue. Comprehensive experiments on internal industrial-scale applications and public datasets demonstrate significant performance gain by ExFM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17494v2-abstract-full').style.display = 'none'; document.getElementById('2502.17494v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the ACM Web Conference (WWW) 2025 Industrial Track as Oral Presentation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17308">arXiv:2502.17308</a> <span> [<a href="https://arxiv.org/pdf/2502.17308">pdf</a>, <a href="https://arxiv.org/format/2502.17308">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Implicit Word Reordering with Knowledge Distillation for Cross-Lingual Dependency Parsing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoran Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chunming Hu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Junfan Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhijun Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Richong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17308v1-abstract-short" style="display: inline;"> Word order difference between source and target languages is a major obstacle to cross-lingual transfer, especially in the dependency parsing task. Current works are mostly based on order-agnostic models or word reordering to mitigate this problem. However, such methods either do not leverage grammatical information naturally contained in word order or are computationally expensive as the permutat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17308v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17308v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17308v1-abstract-full" style="display: none;"> Word order difference between source and target languages is a major obstacle to cross-lingual transfer, especially in the dependency parsing task. Current works are mostly based on order-agnostic models or word reordering to mitigate this problem. However, such methods either do not leverage grammatical information naturally contained in word order or are computationally expensive as the permutation space grows exponentially with the sentence length. Moreover, the reordered source sentence with an unnatural word order may be a form of noising that harms the model learning. To this end, we propose an Implicit Word Reordering framework with Knowledge Distillation (IWR-KD). This framework is inspired by that deep networks are good at learning feature linearization corresponding to meaningful data transformation, e.g. word reordering. To realize this idea, we introduce a knowledge distillation framework composed of a word-reordering teacher model and a dependency parsing student model. We verify our proposed method on Universal Dependency Treebanks across 31 different languages and show it outperforms a series of competitors, together with experimental analysis to illustrate how our method works towards training a robust parser. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17308v1-abstract-full').style.display = 'none'; document.getElementById('2502.17308v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 5 figures, 3 tables. Accepted by The 39th Annual AAAI Conference on Artificial Intelligence (AAAI 2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17214">arXiv:2502.17214</a> <span> [<a href="https://arxiv.org/pdf/2502.17214">pdf</a>, <a href="https://arxiv.org/format/2502.17214">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> CoT-UQ: Improving Response-wise Uncertainty Quantification in LLMs with Chain-of-Thought </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Boxuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17214v1-abstract-short" style="display: inline;"> Large language models (LLMs) excel in many tasks but struggle to accurately quantify uncertainty in their generated responses. This limitation makes it challenging to detect misinformation and ensure reliable decision-making. Existing uncertainty quantification (UQ) methods for LLMs are primarily prompt-wise rather than response-wise, often requiring multiple response samples, which incurs high co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17214v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17214v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17214v1-abstract-full" style="display: none;"> Large language models (LLMs) excel in many tasks but struggle to accurately quantify uncertainty in their generated responses. This limitation makes it challenging to detect misinformation and ensure reliable decision-making. Existing uncertainty quantification (UQ) methods for LLMs are primarily prompt-wise rather than response-wise, often requiring multiple response samples, which incurs high computational costs. Moreover, LLMs have been shown to be overconfident, particularly when using reasoning steps to derive their answers. In this work, we propose CoT-UQ, a response-wise UQ framework that integrates LLMs' inherent reasoning capabilities through Chain-of-Thought (CoT) into the UQ process. CoT-UQ captures critical information during inference by extracting keywords from each reasoning step and assessing their importance to the final answer. This key reasoning information is then aggregated to produce a final uncertainty estimate. We conduct extensive experiments based on LLaMA Family with model sizes varying from 8B to 13B across logical and mathematical reasoning tasks. Experimental results demonstrate that CoT-UQ significantly outperforms existing UQ methods, achieving an average improvement of 5.9% AUROC compared to current UQ methods. The code is available at: https://github.com/ZBox1005/CoT-UQ. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17214v1-abstract-full').style.display = 'none'; document.getElementById('2502.17214v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17175">arXiv:2502.17175</a> <span> [<a href="https://arxiv.org/pdf/2502.17175">pdf</a>, <a href="https://arxiv.org/format/2502.17175">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Linear Bandits on Ellipsoids: Minimax Optimal Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Raymond Zhang</a>, <a href="/search/cs?searchtype=author&query=Hadiji%2C+H">Hedi Hadiji</a>, <a href="/search/cs?searchtype=author&query=Combes%2C+R">Richard Combes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17175v1-abstract-short" style="display: inline;"> We consider linear stochastic bandits where the set of actions is an ellipsoid. We provide the first known minimax optimal algorithm for this problem. We first derive a novel information-theoretic lower bound on the regret of any algorithm, which must be at least $惟(\min(d 蟽\sqrt{T} + d \|胃\|_{A}, \|胃\|_{A} T))$ where $d$ is the dimension, $T$ the time horizon, $蟽^2$ the noise variance, $A$ a matr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17175v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17175v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17175v1-abstract-full" style="display: none;"> We consider linear stochastic bandits where the set of actions is an ellipsoid. We provide the first known minimax optimal algorithm for this problem. We first derive a novel information-theoretic lower bound on the regret of any algorithm, which must be at least $惟(\min(d 蟽\sqrt{T} + d \|胃\|_{A}, \|胃\|_{A} T))$ where $d$ is the dimension, $T$ the time horizon, $蟽^2$ the noise variance, $A$ a matrix defining the set of actions and $胃$ the vector of unknown parameters. We then provide an algorithm whose regret matches this bound to a multiplicative universal constant. The algorithm is non-classical in the sense that it is not optimistic, and it is not a sampling algorithm. The main idea is to combine a novel sequential procedure to estimate $\|胃\|$, followed by an explore-and-commit strategy informed by this estimate. The algorithm is highly computationally efficient, and a run requires only time $O(dT + d^2 \log(T/d) + d^3)$ and memory $O(d^2)$, in contrast with known optimistic algorithms, which are not implementable in polynomial time. We go beyond minimax optimality and show that our algorithm is locally asymptotically minimax optimal, a much stronger notion of optimality. We further provide numerical experiments to illustrate our theoretical findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17175v1-abstract-full').style.display = 'none'; document.getElementById('2502.17175v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16866">arXiv:2502.16866</a> <span> [<a href="https://arxiv.org/pdf/2502.16866">pdf</a>, <a href="https://arxiv.org/format/2502.16866">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Toward Agentic AI: Generative Information Retrieval Inspired Intelligent Communications and Networking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Shunpu Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yinqiu Liu</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+S">Sumei Sun</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+S">Shiwen Mao</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zhu Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16866v1-abstract-short" style="display: inline;"> The increasing complexity and scale of modern telecommunications networks demand intelligent automation to enhance efficiency, adaptability, and resilience. Agentic AI has emerged as a key paradigm for intelligent communications and networking, enabling AI-driven agents to perceive, reason, decide, and act within dynamic networking environments. However, effective decision-making in telecom applic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16866v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16866v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16866v1-abstract-full" style="display: none;"> The increasing complexity and scale of modern telecommunications networks demand intelligent automation to enhance efficiency, adaptability, and resilience. Agentic AI has emerged as a key paradigm for intelligent communications and networking, enabling AI-driven agents to perceive, reason, decide, and act within dynamic networking environments. However, effective decision-making in telecom applications, such as network planning, management, and resource allocation, requires integrating retrieval mechanisms that support multi-hop reasoning, historical cross-referencing, and compliance with evolving 3GPP standards. This article presents a forward-looking perspective on generative information retrieval-inspired intelligent communications and networking, emphasizing the role of knowledge acquisition, processing, and retrieval in agentic AI for telecom systems. We first provide a comprehensive review of generative information retrieval strategies, including traditional retrieval, hybrid retrieval, semantic retrieval, knowledge-based retrieval, and agentic contextual retrieval. We then analyze their advantages, limitations, and suitability for various networking scenarios. Next, we present a survey about their applications in communications and networking. Additionally, we introduce an agentic contextual retrieval framework to enhance telecom-specific planning by integrating multi-source retrieval, structured reasoning, and self-reflective validation. Experimental results demonstrate that our framework significantly improves answer accuracy, explanation consistency, and retrieval efficiency compared to traditional and semantic retrieval methods. Finally, we outline future research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16866v1-abstract-full').style.display = 'none'; document.getElementById('2502.16866v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16090">arXiv:2502.16090</a> <span> [<a href="https://arxiv.org/pdf/2502.16090">pdf</a>, <a href="https://arxiv.org/format/2502.16090">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Echo: A Large Language Model with Temporal Episodic Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+W">WenTao Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruohua Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Aimin Zhou</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+F">Feng Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">JiaLi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16090v1-abstract-short" style="display: inline;"> Research on large language models (LLMs) has shown remarkable performance in domains such as mathematics, programming, and literary creation. However, most studies have focused on semantic memory-based question answering, neglecting LLMs' potential to handle episodic memory (EM)-related queries. This oversight has led to suboptimal performance in applications requiring EM, including emotional comp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16090v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16090v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16090v1-abstract-full" style="display: none;"> Research on large language models (LLMs) has shown remarkable performance in domains such as mathematics, programming, and literary creation. However, most studies have focused on semantic memory-based question answering, neglecting LLMs' potential to handle episodic memory (EM)-related queries. This oversight has led to suboptimal performance in applications requiring EM, including emotional companionship, personal AI assistants, and AI teachers. To address this gap, we introduce Echo, a LLM enhanced with temporal episodic memory. We propose a Multi-Agent Data Generation Framework that guides the model in generating multi-turn, complex scenario episodic memory dialogue data (EM-Train). Temporal information is innovatively incorporated into the LLM training process, and Echo is trained using the EM-Train. Furthermore, We develop an EM-Test benchmark specifically designed to evaluate LLMs' episodic memory capabilities. The EM-Test assesses performance across various time spans and difficulty levels, providing a comprehensive evaluation of multi-turn episodic memory dialogues. Our experiments demonstrate that Echo significantly outperforms state-of-the-art LLMs on EM-Test. Additionally, a qualitative analysis reveals Echo's potential to exhibit human-like episodic memory capabilities. We will open-source all datasets, code, and model weights. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16090v1-abstract-full').style.display = 'none'; document.getElementById('2502.16090v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15954">arXiv:2502.15954</a> <span> [<a href="https://arxiv.org/pdf/2502.15954">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MMRAG: Multi-Mode Retrieval-Augmented Generation with Large Language Models for Biomedical In-Context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhan%2C+Z">Zaifu Zhan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Shuang Zhou</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiawen Deng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15954v1-abstract-short" style="display: inline;"> Objective: To optimize in-context learning in biomedical natural language processing by improving example selection. Methods: We introduce a novel multi-mode retrieval-augmented generation (MMRAG) framework, which integrates four retrieval strategies: (1) Random Mode, selecting examples arbitrarily; (2) Top Mode, retrieving the most relevant examples based on similarity; (3) Diversity Mode, ensuri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15954v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15954v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15954v1-abstract-full" style="display: none;"> Objective: To optimize in-context learning in biomedical natural language processing by improving example selection. Methods: We introduce a novel multi-mode retrieval-augmented generation (MMRAG) framework, which integrates four retrieval strategies: (1) Random Mode, selecting examples arbitrarily; (2) Top Mode, retrieving the most relevant examples based on similarity; (3) Diversity Mode, ensuring variation in selected examples; and (4) Class Mode, selecting category-representative examples. This study evaluates MMRAG on three core biomedical NLP tasks: Named Entity Recognition (NER), Relation Extraction (RE), and Text Classification (TC). The datasets used include BC2GM for gene and protein mention recognition (NER), DDI for drug-drug interaction extraction (RE), GIT for general biomedical information extraction (RE), and HealthAdvice for health-related text classification (TC). The framework is tested with two large language models (Llama2-7B, Llama3-8B) and three retrievers (Contriever, MedCPT, BGE-Large) to assess performance across different retrieval strategies. Results: The results from the Random mode indicate that providing more examples in the prompt improves the model's generation performance. Meanwhile, Top mode and Diversity mode significantly outperform Random mode on the RE (DDI) task, achieving an F1 score of 0.9669, a 26.4% improvement. Among the three retrievers tested, Contriever outperformed the other two in a greater number of experiments. Additionally, Llama 2 and Llama 3 demonstrated varying capabilities across different tasks, with Llama 3 showing a clear advantage in handling NER tasks. Conclusion: MMRAG effectively enhances biomedical in-context learning by refining example selection, mitigating data scarcity issues, and demonstrating superior adaptability for NLP-driven healthcare applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15954v1-abstract-full').style.display = 'none'; document.getElementById('2502.15954v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to JAMIA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15312">arXiv:2502.15312</a> <span> [<a href="https://arxiv.org/pdf/2502.15312">pdf</a>, <a href="https://arxiv.org/format/2502.15312">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> FlexPie: Accelerate Distributed Inference on Edge Devices with Flexible Combinatorial Optimization[Technical Report] </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Runhua Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Hongxu Jiang</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+J">Jinkun Geng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yuhang Ma</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+C">Chenhui Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haojie Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15312v1-abstract-short" style="display: inline;"> The rapid advancement of deep learning has catalyzed the development of novel IoT applications, which often deploy pre-trained deep neural network (DNN) models across multiple edge devices for collaborative inference. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15312v1-abstract-full" style="display: none;"> The rapid advancement of deep learning has catalyzed the development of novel IoT applications, which often deploy pre-trained deep neural network (DNN) models across multiple edge devices for collaborative inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15312v1-abstract-full').style.display = 'none'; document.getElementById('2502.15312v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14895">arXiv:2502.14895</a> <span> [<a href="https://arxiv.org/pdf/2502.14895">pdf</a>, <a href="https://arxiv.org/format/2502.14895">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> High-Dynamic Radar Sequence Prediction for Weather Nowcasting Using Spatiotemporal Coherent Gaussian Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziye Wang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yiran Qin</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+L">Lin Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruimao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14895v1-abstract-short" style="display: inline;"> Weather nowcasting is an essential task that involves predicting future radar echo sequences based on current observations, offering significant benefits for disaster management, transportation, and urban planning. Current prediction methods are limited by training and storage efficiency, mainly focusing on 2D spatial predictions at specific altitudes. Meanwhile, 3D volumetric predictions at each… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14895v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14895v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14895v1-abstract-full" style="display: none;"> Weather nowcasting is an essential task that involves predicting future radar echo sequences based on current observations, offering significant benefits for disaster management, transportation, and urban planning. Current prediction methods are limited by training and storage efficiency, mainly focusing on 2D spatial predictions at specific altitudes. Meanwhile, 3D volumetric predictions at each timestamp remain largely unexplored. To address such a challenge, we introduce a comprehensive framework for 3D radar sequence prediction in weather nowcasting, using the newly proposed SpatioTemporal Coherent Gaussian Splatting (STC-GS) for dynamic radar representation and GauMamba for efficient and accurate forecasting. Specifically, rather than relying on a 4D Gaussian for dynamic scene reconstruction, STC-GS optimizes 3D scenes at each frame by employing a group of Gaussians while effectively capturing their movements across consecutive frames. It ensures consistent tracking of each Gaussian over time, making it particularly effective for prediction tasks. With the temporally correlated Gaussian groups established, we utilize them to train GauMamba, which integrates a memory mechanism into the Mamba framework. This allows the model to learn the temporal evolution of Gaussian groups while efficiently handling a large volume of Gaussian tokens. As a result, it achieves both efficiency and accuracy in forecasting a wide range of dynamic meteorological radar signals. The experimental results demonstrate that our STC-GS can efficiently represent 3D radar sequences with over $16\times$ higher spatial resolution compared with the existing 3D representation methods, while GauMamba outperforms state-of-the-art methods in forecasting a broad spectrum of high-dynamic weather conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14895v1-abstract-full').style.display = 'none'; document.getElementById('2502.14895v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as an Oral paper at ICLR 2025. Project page: https://ziyeeee.github.io/stcgs.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14198">arXiv:2502.14198</a> <span> [<a href="https://arxiv.org/pdf/2502.14198">pdf</a>, <a href="https://arxiv.org/ps/2502.14198">ps</a>, <a href="https://arxiv.org/format/2502.14198">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Antenna Position and Beamforming Optimization for Movable Antenna Enabled ISAC: Optimal Solutions and Efficient Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lebin Chen</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Ming-Min Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Min-Jian Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14198v1-abstract-short" style="display: inline;"> In this paper, we propose an integrated sensing and communication (ISAC) system enabled by movable antennas (MAs), which can dynamically adjust antenna positions to enhance both sensing and communication performance for future wireless networks. To characterize the benefits of MA-enabled ISAC systems, we first derive the Cram茅r-Rao bound (CRB) for angle estimation error, which is then minimized fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14198v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14198v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14198v1-abstract-full" style="display: none;"> In this paper, we propose an integrated sensing and communication (ISAC) system enabled by movable antennas (MAs), which can dynamically adjust antenna positions to enhance both sensing and communication performance for future wireless networks. To characterize the benefits of MA-enabled ISAC systems, we first derive the Cram茅r-Rao bound (CRB) for angle estimation error, which is then minimized for optimizing the antenna position vector (APV) and beamforming design, subject to a pre-defined signal-to-noise ratio (SNR) constraint to ensure the communication performance. In particular, for the case with receive MAs only, we provide a closed-form optimal antenna position solution, and show that employing MAs over conventional fixed-position antennas (FPAs) can achieve a sensing performance gain upper-bounded by 4.77 dB. On the other hand, for the case with transmit MAs only, we develop a boundary traversal breadth-first search (BT-BFS) algorithm to obtain the global optimal solution in the line-of-sight (LoS) channel scenario, along with a lower-complexity boundary traversal depth-first search (BT-DFS) algorithm to find a local optimal solution efficiently. While in the scenario with non-LoS (NLoS) channels, a majorization-minimization (MM) based Rosen's gradient projection (RGP) algorithm with an efficient initialization method is proposed to obtain stationary solutions for the considered problem, which can be extended to the general case with both transmit and receive MAs. Extensive numerical results are presented to verify the effectiveness of the proposed algorithms, and demonstrate the superiority of the considered MA-enabled ISAC system over conventional ISAC systems with FPAs in terms of sensing and communication performance trade-off. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14198v1-abstract-full').style.display = 'none'; document.getElementById('2502.14198v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13913">arXiv:2502.13913</a> <span> [<a href="https://arxiv.org/pdf/2502.13913">pdf</a>, <a href="https://arxiv.org/format/2502.13913">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> How Do LLMs Perform Two-Hop Reasoning in Context? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+T">Tianyu Guo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Hanlin Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruiqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+J">Jiantao Jiao</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+S">Song Mei</a>, <a href="/search/cs?searchtype=author&query=Jordan%2C+M+I">Michael I. Jordan</a>, <a href="/search/cs?searchtype=author&query=Russell%2C+S">Stuart Russell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13913v1-abstract-short" style="display: inline;"> "Socrates is human. All humans are mortal. Therefore, Socrates is mortal." This classical example demonstrates two-hop reasoning, where a conclusion logically follows from two connected premises. While transformer-based Large Language Models (LLMs) can make two-hop reasoning, they tend to collapse to random guessing when faced with distracting premises. To understand the underlying mechanism, we t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13913v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13913v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13913v1-abstract-full" style="display: none;"> "Socrates is human. All humans are mortal. Therefore, Socrates is mortal." This classical example demonstrates two-hop reasoning, where a conclusion logically follows from two connected premises. While transformer-based Large Language Models (LLMs) can make two-hop reasoning, they tend to collapse to random guessing when faced with distracting premises. To understand the underlying mechanism, we train a three-layer transformer on synthetic two-hop reasoning tasks. The training dynamics show two stages: a slow learning phase, where the 3-layer transformer performs random guessing like LLMs, followed by an abrupt phase transitions, where the 3-layer transformer suddenly reaches $100%$ accuracy. Through reverse engineering, we explain the inner mechanisms for how models learn to randomly guess between distractions initially, and how they learn to ignore distractions eventually. We further propose a three-parameter model that supports the causal claims for the mechanisms to the training dynamics of the transformer. Finally, experiments on LLMs suggest that the discovered mechanisms generalize across scales. Our methodologies provide new perspectives for scientific understandings of LLMs and our findings provide new insights into how reasoning emerges during training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13913v1-abstract-full').style.display = 'none'; document.getElementById('2502.13913v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13894">arXiv:2502.13894</a> <span> [<a href="https://arxiv.org/pdf/2502.13894">pdf</a>, <a href="https://arxiv.org/format/2502.13894">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NavigateDiff: Visual Predictors are Zero-Shot Navigation Assistants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yiran Qin</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+A">Ao Sun</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yuze Hong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Benyou Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruimao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13894v1-abstract-short" style="display: inline;"> Navigating unfamiliar environments presents significant challenges for household robots, requiring the ability to recognize and reason about novel decoration and layout. Existing reinforcement learning methods cannot be directly transferred to new environments, as they typically rely on extensive mapping and exploration, leading to time-consuming and inefficient. To address these challenges, we tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13894v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13894v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13894v1-abstract-full" style="display: none;"> Navigating unfamiliar environments presents significant challenges for household robots, requiring the ability to recognize and reason about novel decoration and layout. Existing reinforcement learning methods cannot be directly transferred to new environments, as they typically rely on extensive mapping and exploration, leading to time-consuming and inefficient. To address these challenges, we try to transfer the logical knowledge and the generalization ability of pre-trained foundation models to zero-shot navigation. By integrating a large vision-language model with a diffusion network, our approach named \mname ~constructs a visual predictor that continuously predicts the agent's potential observations in the next step which can assist robots generate robust actions. Furthermore, to adapt the temporal property of navigation, we introduce temporal historical information to ensure that the predicted image is aligned with the navigation scene. We then carefully designed an information fusion framework that embeds the predicted future frames as guidance into goal-reaching policy to solve downstream image navigation tasks. This approach enhances navigation control and generalization across both simulated and real-world environments. Through extensive experimentation, we demonstrate the robustness and versatility of our method, showcasing its potential to improve the efficiency and effectiveness of robotic navigation in diverse settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13894v1-abstract-full').style.display = 'none'; document.getElementById('2502.13894v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICRA2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13719">arXiv:2502.13719</a> <span> [<a href="https://arxiv.org/pdf/2502.13719">pdf</a>, <a href="https://arxiv.org/format/2502.13719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TrustRAG: An Information Assistant with Retrieval Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+Y">Yixing Fan</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Q">Qiang Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenshan Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13719v1-abstract-short" style="display: inline;"> \Ac{RAG} has emerged as a crucial technique for enhancing large models with real-time and domain-specific knowledge. While numerous improvements and open-source tools have been proposed to refine the \ac{RAG} framework for accuracy, relatively little attention has been given to improving the trustworthiness of generated results. To address this gap, we introduce TrustRAG, a novel framework that en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13719v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13719v1-abstract-full" style="display: none;"> \Ac{RAG} has emerged as a crucial technique for enhancing large models with real-time and domain-specific knowledge. While numerous improvements and open-source tools have been proposed to refine the \ac{RAG} framework for accuracy, relatively little attention has been given to improving the trustworthiness of generated results. To address this gap, we introduce TrustRAG, a novel framework that enhances \ac{RAG} from three perspectives: indexing, retrieval, and generation. Specifically, in the indexing stage, we propose a semantic-enhanced chunking strategy that incorporates hierarchical indexing to supplement each chunk with contextual information, ensuring semantic completeness. In the retrieval stage, we introduce a utility-based filtering mechanism to identify high-quality information, supporting answer generation while reducing input length. In the generation stage, we propose fine-grained citation enhancement, which detects opinion-bearing sentences in responses and infers citation relationships at the sentence-level, thereby improving citation accuracy. We open-source the TrustRAG framework and provide a demonstration studio designed for excerpt-based question answering tasks \footnote{https://huggingface.co/spaces/golaxy/TrustRAG}. Based on these, we aim to help researchers: 1) systematically enhancing the trustworthiness of \ac{RAG} systems and (2) developing their own \ac{RAG} systems with more reliable outputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13719v1-abstract-full').style.display = 'none'; document.getElementById('2502.13719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12900">arXiv:2502.12900</a> <span> [<a href="https://arxiv.org/pdf/2502.12900">pdf</a>, <a href="https://arxiv.org/format/2502.12900">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Soundwave: Less is More for Speech-Text Alignment in LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuhao Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiheng Liu</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+F">Fan Bu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruiyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Benyou Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haizhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12900v1-abstract-short" style="display: inline;"> Existing end-to-end speech large language models (LLMs) usually rely on large-scale annotated data for training, while data-efficient training has not been discussed in depth. We focus on two fundamental problems between speech and text: the representation space gap and sequence length inconsistency. We propose Soundwave, which utilizes an efficient training strategy and a novel architecture to ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12900v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12900v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12900v1-abstract-full" style="display: none;"> Existing end-to-end speech large language models (LLMs) usually rely on large-scale annotated data for training, while data-efficient training has not been discussed in depth. We focus on two fundamental problems between speech and text: the representation space gap and sequence length inconsistency. We propose Soundwave, which utilizes an efficient training strategy and a novel architecture to address these issues. Results show that Soundwave outperforms the advanced Qwen2-Audio in speech translation and AIR-Bench speech tasks, using only one-fiftieth of the training data. Further analysis shows that Soundwave still retains its intelligence during conversation. The project is available at https://github.com/FreedomIntelligence/Soundwave. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12900v1-abstract-full').style.display = 'none'; document.getElementById('2502.12900v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12786">arXiv:2502.12786</a> <span> [<a href="https://arxiv.org/pdf/2502.12786">pdf</a>, <a href="https://arxiv.org/format/2502.12786">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Composition and Control with Distilled Energy Diffusion Models and Sequential Monte Carlo </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thornton%2C+J">James Thornton</a>, <a href="/search/cs?searchtype=author&query=Bethune%2C+L">Louis Bethune</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruixiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Bradley%2C+A">Arwen Bradley</a>, <a href="/search/cs?searchtype=author&query=Nakkiran%2C+P">Preetum Nakkiran</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+S">Shuangfei Zhai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12786v1-abstract-short" style="display: inline;"> Diffusion models may be formulated as a time-indexed sequence of energy-based models, where the score corresponds to the negative gradient of an energy function. As opposed to learning the score directly, an energy parameterization is attractive as the energy itself can be used to control generation via Monte Carlo samplers. Architectural constraints and training instability in energy parameterize… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12786v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12786v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12786v1-abstract-full" style="display: none;"> Diffusion models may be formulated as a time-indexed sequence of energy-based models, where the score corresponds to the negative gradient of an energy function. As opposed to learning the score directly, an energy parameterization is attractive as the energy itself can be used to control generation via Monte Carlo samplers. Architectural constraints and training instability in energy parameterized models have so far yielded inferior performance compared to directly approximating the score or denoiser. We address these deficiencies by introducing a novel training regime for the energy function through distillation of pre-trained diffusion models, resembling a Helmholtz decomposition of the score vector field. We further showcase the synergies between energy and score by casting the diffusion sampling procedure as a Feynman Kac model where sampling is controlled using potentials from the learnt energy functions. The Feynman Kac model formalism enables composition and low temperature sampling through sequential Monte Carlo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12786v1-abstract-full').style.display = 'none'; document.getElementById('2502.12786v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Initial submission to openreview on October 3, 2024 (https://openreview.net/forum?id=6GyX0YRw8P); accepted to AISTATS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12514">arXiv:2502.12514</a> <span> [<a href="https://arxiv.org/pdf/2502.12514">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Memory-updated-based Framework for 100% Reliable Flexible Flat Cables Insertion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Z">Zhengrong Ling</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiong Yang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+D">Dong Guo</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+H">Hongyuan Chang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tieshan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruijia Zhang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yajing Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12514v1-abstract-short" style="display: inline;"> Automatic assembly lines have increasingly replaced human labor in various tasks; however, the automation of Flexible Flat Cable (FFC) insertion remains unrealized due to its high requirement for effective feedback and dynamic operation, limiting approximately 11% of global industrial capacity. Despite lots of approaches, like vision-based tactile sensors and reinforcement learning, having been pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12514v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12514v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12514v1-abstract-full" style="display: none;"> Automatic assembly lines have increasingly replaced human labor in various tasks; however, the automation of Flexible Flat Cable (FFC) insertion remains unrealized due to its high requirement for effective feedback and dynamic operation, limiting approximately 11% of global industrial capacity. Despite lots of approaches, like vision-based tactile sensors and reinforcement learning, having been proposed, the implementation of human-like high-reliable insertion (i.e., with a 100% success rate in completed insertion) remains a big challenge. Drawing inspiration from human behavior in FFC insertion, which involves sensing three-dimensional forces, translating them into physical concepts, and continuously improving estimates, we propose a novel framework. This framework includes a sensing module for collecting three-dimensional tactile data, a perception module for interpreting this data into meaningful physical signals, and a memory module based on Bayesian theory for reliability estimation and control. This strategy enables the robot to accurately assess its physical state and generate reliable status estimations and corrective actions. Experimental results demonstrate that the robot using this framework can detect alignment errors of 0.5 mm with an accuracy of 97.92% and then achieve a 100% success rate in all completed tests after a few iterations. This work addresses the challenges of unreliable perception and control in complex insertion tasks, highlighting the path toward the development of fully automated production lines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12514v1-abstract-full').style.display = 'none'; document.getElementById('2502.12514v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12102">arXiv:2502.12102</a> <span> [<a href="https://arxiv.org/pdf/2502.12102">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Relational Norms for Human-AI Cooperation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Earp%2C+B+D">Brian D. Earp</a>, <a href="/search/cs?searchtype=author&query=Mann%2C+S+P">Sebastian Porsdam Mann</a>, <a href="/search/cs?searchtype=author&query=Aboy%2C+M">Mateo Aboy</a>, <a href="/search/cs?searchtype=author&query=Awad%2C+E">Edmond Awad</a>, <a href="/search/cs?searchtype=author&query=Betzler%2C+M">Monika Betzler</a>, <a href="/search/cs?searchtype=author&query=Botes%2C+M">Marietjie Botes</a>, <a href="/search/cs?searchtype=author&query=Calcott%2C+R">Rachel Calcott</a>, <a href="/search/cs?searchtype=author&query=Caraccio%2C+M">Mina Caraccio</a>, <a href="/search/cs?searchtype=author&query=Chater%2C+N">Nick Chater</a>, <a href="/search/cs?searchtype=author&query=Coeckelbergh%2C+M">Mark Coeckelbergh</a>, <a href="/search/cs?searchtype=author&query=Constantinescu%2C+M">Mihaela Constantinescu</a>, <a href="/search/cs?searchtype=author&query=Dabbagh%2C+H">Hossein Dabbagh</a>, <a href="/search/cs?searchtype=author&query=Devlin%2C+K">Kate Devlin</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+X">Xiaojun Ding</a>, <a href="/search/cs?searchtype=author&query=Dranseika%2C+V">Vilius Dranseika</a>, <a href="/search/cs?searchtype=author&query=Everett%2C+J+A+C">Jim A. C. Everett</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+R">Ruiping Fan</a>, <a href="/search/cs?searchtype=author&query=Feroz%2C+F">Faisal Feroz</a>, <a href="/search/cs?searchtype=author&query=Francis%2C+K+B">Kathryn B. Francis</a>, <a href="/search/cs?searchtype=author&query=Friedman%2C+C">Cindy Friedman</a>, <a href="/search/cs?searchtype=author&query=Friedrich%2C+O">Orsolya Friedrich</a>, <a href="/search/cs?searchtype=author&query=Gabriel%2C+I">Iason Gabriel</a>, <a href="/search/cs?searchtype=author&query=Hannikainen%2C+I">Ivar Hannikainen</a>, <a href="/search/cs?searchtype=author&query=Hellmann%2C+J">Julie Hellmann</a>, <a href="/search/cs?searchtype=author&query=Jahrome%2C+A+K">Arasj Khodadade Jahrome</a> , et al. (37 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12102v1-abstract-short" style="display: inline;"> How we should design and interact with social artificial intelligence depends on the socio-relational role the AI is meant to emulate or occupy. In human society, relationships such as teacher-student, parent-child, neighbors, siblings, or employer-employee are governed by specific norms that prescribe or proscribe cooperative functions including hierarchy, care, transaction, and mating. These nor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12102v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12102v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12102v1-abstract-full" style="display: none;"> How we should design and interact with social artificial intelligence depends on the socio-relational role the AI is meant to emulate or occupy. In human society, relationships such as teacher-student, parent-child, neighbors, siblings, or employer-employee are governed by specific norms that prescribe or proscribe cooperative functions including hierarchy, care, transaction, and mating. These norms shape our judgments of what is appropriate for each partner. For example, workplace norms may allow a boss to give orders to an employee, but not vice versa, reflecting hierarchical and transactional expectations. As AI agents and chatbots powered by large language models are increasingly designed to serve roles analogous to human positions - such as assistant, mental health provider, tutor, or romantic partner - it is imperative to examine whether and how human relational norms should extend to human-AI interactions. Our analysis explores how differences between AI systems and humans, such as the absence of conscious experience and immunity to fatigue, may affect an AI's capacity to fulfill relationship-specific functions and adhere to corresponding norms. This analysis, which is a collaborative effort by philosophers, psychologists, relationship scientists, ethicists, legal experts, and AI researchers, carries important implications for AI systems design, user behavior, and regulation. While we accept that AI systems can offer significant benefits such as increased availability and consistency in certain socio-relational roles, they also risk fostering unhealthy dependencies or unrealistic expectations that could spill over into human-human relationships. We propose that understanding and thoughtfully shaping (or implementing) suitable human-AI relational norms will be crucial for ensuring that human-AI interactions are ethical, trustworthy, and favorable to human well-being. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12102v1-abstract-full').style.display = 'none'; document.getElementById('2502.12102v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">76 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11767">arXiv:2502.11767</a> <span> [<a href="https://arxiv.org/pdf/2502.11767">pdf</a>, <a href="https://arxiv.org/format/2502.11767">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> From Selection to Generation: A Survey of LLM-based Active Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yu Xia</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+S">Subhojyoti Mukherjee</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhouhang Xie</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Junda Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xintong Li</a>, <a href="/search/cs?searchtype=author&query=Aponte%2C+R">Ryan Aponte</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+H">Hanjia Lyu</a>, <a href="/search/cs?searchtype=author&query=Barrow%2C+J">Joe Barrow</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongjie Chen</a>, <a href="/search/cs?searchtype=author&query=Dernoncourt%2C+F">Franck Dernoncourt</a>, <a href="/search/cs?searchtype=author&query=Kveton%2C+B">Branislav Kveton</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tong Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruiyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">Jiuxiang Gu</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+N+K">Nesreen K. Ahmed</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&query=Deilamsalehy%2C+H">Hanieh Deilamsalehy</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Sungchul Kim</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhengmian Hu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yue Zhao</a>, <a href="/search/cs?searchtype=author&query=Lipka%2C+N">Nedim Lipka</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+S">Seunghyun Yoon</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T+K">Ting-Hao Kenneth Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zichao Wang</a> , et al. (9 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11767v1-abstract-short" style="display: inline;"> Active Learning (AL) has been a powerful paradigm for improving model efficiency and performance by selecting the most informative data points for labeling and training. In recent active learning frameworks, Large Language Models (LLMs) have been employed not only for selection but also for generating entirely new data instances and providing more cost-effective annotations. Motivated by the incre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11767v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11767v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11767v1-abstract-full" style="display: none;"> Active Learning (AL) has been a powerful paradigm for improving model efficiency and performance by selecting the most informative data points for labeling and training. In recent active learning frameworks, Large Language Models (LLMs) have been employed not only for selection but also for generating entirely new data instances and providing more cost-effective annotations. Motivated by the increasing importance of high-quality data and efficient model training in the era of LLMs, we present a comprehensive survey on LLM-based Active Learning. We introduce an intuitive taxonomy that categorizes these techniques and discuss the transformative roles LLMs can play in the active learning loop. We further examine the impact of AL on LLM learning paradigms and its applications across various domains. Finally, we identify open challenges and propose future research directions. This survey aims to serve as an up-to-date resource for researchers and practitioners seeking to gain an intuitive understanding of LLM-based AL techniques and deploy them to new applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11767v1-abstract-full').style.display = 'none'; document.getElementById('2502.11767v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11386">arXiv:2502.11386</a> <span> [<a href="https://arxiv.org/pdf/2502.11386">pdf</a>, <a href="https://arxiv.org/format/2502.11386">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Intelligent Mobile AI-Generated Content Services via Interactive Prompt Engineering and Dynamic Service Provisioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yinqiu Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xianbin Wang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11386v1-abstract-short" style="display: inline;"> Due to massive computational demands of large generative models, AI-Generated Content (AIGC) can organize collaborative Mobile AIGC Service Providers (MASPs) at network edges to provide ubiquitous and customized content generation for resource-constrained users. However, such a paradigm faces two significant challenges: 1) raw prompts (i.e., the task description from users) often lead to poor gene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11386v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11386v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11386v1-abstract-full" style="display: none;"> Due to massive computational demands of large generative models, AI-Generated Content (AIGC) can organize collaborative Mobile AIGC Service Providers (MASPs) at network edges to provide ubiquitous and customized content generation for resource-constrained users. However, such a paradigm faces two significant challenges: 1) raw prompts (i.e., the task description from users) often lead to poor generation quality due to users' lack of experience with specific AIGC models, and 2) static service provisioning fails to efficiently utilize computational and communication resources given the heterogeneity of AIGC tasks. To address these challenges, we propose an intelligent mobile AIGC service scheme. Firstly, we develop an interactive prompt engineering mechanism that leverages a Large Language Model (LLM) to generate customized prompt corpora and employs Inverse Reinforcement Learning (IRL) for policy imitation through small-scale expert demonstrations. Secondly, we formulate a dynamic mobile AIGC service provisioning problem that jointly optimizes the number of inference trials and transmission power allocation. Then, we propose the Diffusion-Enhanced Deep Deterministic Policy Gradient (D3PG) algorithm to solve the problem. By incorporating the diffusion process into Deep Reinforcement Learning (DRL) architecture, the environment exploration capability can be improved, thus adapting to varying mobile AIGC scenarios. Extensive experimental results demonstrate that our prompt engineering approach improves single-round generation success probability by 6.3 times, while D3PG increases the user service experience by 67.8% compared to baseline DRL approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11386v1-abstract-full').style.display = 'none'; document.getElementById('2502.11386v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11346">arXiv:2502.11346</a> <span> [<a href="https://arxiv.org/pdf/2502.11346">pdf</a>, <a href="https://arxiv.org/ps/2502.11346">ps</a>, <a href="https://arxiv.org/format/2502.11346">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Power-Measurement-Based Channel Autocorrelation Estimation for IRS-Assisted Wideband Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+H">He Sun</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+W">Weidong Mei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11346v1-abstract-short" style="display: inline;"> Channel state information (CSI) is essential to the performance optimization of intelligent reflecting surface (IRS)-aided wireless communication systems. However, the passive and frequency-flat reflection of IRS, as well as the high-dimensional IRS-reflected channels, have posed practical challenges for efficient IRS channel estimation, especially in wideband communication systems with significan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11346v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11346v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11346v1-abstract-full" style="display: none;"> Channel state information (CSI) is essential to the performance optimization of intelligent reflecting surface (IRS)-aided wireless communication systems. However, the passive and frequency-flat reflection of IRS, as well as the high-dimensional IRS-reflected channels, have posed practical challenges for efficient IRS channel estimation, especially in wideband communication systems with significant multi-path channel delay spread. To tackle the above challenge, we propose a novel neural network (NN)-empowered IRS channel estimation and passive reflection design framework for the wideband orthogonal frequency division multiplexing (OFDM) communication system based only on the user's reference signal received power (RSRP) measurements with time-varying random IRS training reflections. In particular, we show that the average received signal power over all OFDM subcarriers at the user terminal can be represented as the prediction of a single-layer NN composed of multiple subnetworks with the same structure, such that the autocorrelation matrix of the wideband IRS channel can be recovered as their weights via supervised learning. To exploit the potential sparsity of the channel autocorrelation matrix, a progressive training method is proposed by gradually increasing the number of subnetworks until a desired accuracy is achieved, thus reducing the training complexity. Based on the estimates of IRS channel autocorrelation matrix, the IRS passive reflection is then optimized to maximize the average channel power gain over all subcarriers. Numerical results indicate the effectiveness of the proposed IRS channel autocorrelation matrix estimation and passive reflection design under wideband channels, which can achieve significant performance improvement compared to the existing IRS reflection designs based on user power measurements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11346v1-abstract-full').style.display = 'none'; document.getElementById('2502.11346v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2411.00374</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11193">arXiv:2502.11193</a> <span> [<a href="https://arxiv.org/pdf/2502.11193">pdf</a>, <a href="https://arxiv.org/format/2502.11193">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models Penetration in Scholarly Writing and Peer Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+L">Li Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruijie Zhang</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+X">Xunlian Dai</a>, <a href="/search/cs?searchtype=author&query=Hershcovich%2C+D">Daniel Hershcovich</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haizhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11193v1-abstract-short" style="display: inline;"> While the widespread use of Large Language Models (LLMs) brings convenience, it also raises concerns about the credibility of academic research and scholarly processes. To better understand these dynamics, we evaluate the penetration of LLMs across academic workflows from multiple perspectives and dimensions, providing compelling evidence of their growing influence. We propose a framework with two… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11193v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11193v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11193v1-abstract-full" style="display: none;"> While the widespread use of Large Language Models (LLMs) brings convenience, it also raises concerns about the credibility of academic research and scholarly processes. To better understand these dynamics, we evaluate the penetration of LLMs across academic workflows from multiple perspectives and dimensions, providing compelling evidence of their growing influence. We propose a framework with two components: \texttt{ScholarLens}, a curated dataset of human- and LLM-generated content across scholarly writing and peer review for multi-perspective evaluation, and \texttt{LLMetrica}, a tool for assessing LLM penetration using rule-based metrics and model-based detectors for multi-dimensional evaluation. Our experiments demonstrate the effectiveness of \texttt{LLMetrica}, revealing the increasing role of LLMs in scholarly processes. These findings emphasize the need for transparency, accountability, and ethical practices in LLM usage to maintain academic credibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11193v1-abstract-full').style.display = 'none'; document.getElementById('2502.11193v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Transparency in NLP, LLM-generated text evaluation and detection, LLM Penetration, Scholarly Credibility and Accountability</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11024">arXiv:2502.11024</a> <span> [<a href="https://arxiv.org/pdf/2502.11024">pdf</a>, <a href="https://arxiv.org/format/2502.11024">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TPCap: Unlocking Zero-Shot Image Captioning with Trigger-Augmented and Multi-Modal Purification Modules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruoyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lulu Wang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yi He</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+T">Tongling Pan</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhengtao Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yingna Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11024v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) have significantly enhanced the fluency and logical coherence of image captioning. Retrieval-Augmented Generation (RAG) is widely adopted to incorporate external knowledge into LLMs; however, existing RAG-based methods rely on separate retrieval banks, introducing computational overhead and limiting the utilization of LLMs' inherent zero-shot cap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11024v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11024v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11024v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) have significantly enhanced the fluency and logical coherence of image captioning. Retrieval-Augmented Generation (RAG) is widely adopted to incorporate external knowledge into LLMs; however, existing RAG-based methods rely on separate retrieval banks, introducing computational overhead and limiting the utilization of LLMs' inherent zero-shot capabilities. To address these limitations, we propose TPCap, a novel trigger-augmented and multi-modal purification framework for zero-shot image captioning without external retrieval libraries. TPCap consists of two key components: trigger-augmented (TA) generation and multi-modal purification (MP). The TA module employs a trigger projector with frozen and learnable projections to activate LLMs' contextual reasoning, enhance visual-textual alignment, and mitigate data bias. The MP module further refines the generated entity-related information by filtering noise and enhancing feature quality, ensuring more precise and factually consistent captions. We evaluate TPCap on COCO, NoCaps, Flickr30k, and WHOOPS datasets. With only 0.82M trainable parameters and training on a single NVIDIA RTX 4090 GPU, TPCap achieves competitive performance comparable to state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11024v1-abstract-full').style.display = 'none'; document.getElementById('2502.11024v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10940">arXiv:2502.10940</a> <span> [<a href="https://arxiv.org/pdf/2502.10940">pdf</a>, <a href="https://arxiv.org/format/2502.10940">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CoLA: Compute-Efficient Pre-Training of LLMs via Low-Rank Activation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziyue Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruijie Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhengyang Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zi Yang</a>, <a href="/search/cs?searchtype=author&query=Hovland%2C+P">Paul Hovland</a>, <a href="/search/cs?searchtype=author&query=Nicolae%2C+B">Bogdan Nicolae</a>, <a href="/search/cs?searchtype=author&query=Cappello%2C+F">Franck Cappello</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zheng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10940v1-abstract-short" style="display: inline;"> Large language models (LLMs) are revolutionizing many science and engineering fields. However, their huge model sizes impose extremely demanding needs of computational resources in the pre-training stage. Although low-rank factorizations can reduce model parameters, their direct application in LLM pre-training often lead to non-negligible performance loss. To address this fundamental challenge, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10940v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10940v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10940v1-abstract-full" style="display: none;"> Large language models (LLMs) are revolutionizing many science and engineering fields. However, their huge model sizes impose extremely demanding needs of computational resources in the pre-training stage. Although low-rank factorizations can reduce model parameters, their direct application in LLM pre-training often lead to non-negligible performance loss. To address this fundamental challenge, we introduce CoLA and its memory-efficient implementation, CoLA-M. We leverage the low-rank structure observed widely in model activations, enforcing non-linear transformations between factorized weight matrices to reduce model size, boost model capacity and training efficiency. Experiments on LLaMA models with 60 million to 7 billion parameters show that CoLA reduces the computing cost by $\bf 2\pmb{\times}$ and improves training throughput by $\bf 1.86\pmb{\times}$ while maintaining full-rank level performance. CoLA-M further squeezes memory cost without sacrificing throughput, offering a pre-training approach with collectively superior parameter, computing, and memory efficiency. The LLMs produced are also $\bf 2\pmb{\times}$ smaller, enabling faster inference with lower memory cost on resource-constrained platforms <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10940v1-abstract-full').style.display = 'none'; document.getElementById('2502.10940v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09914">arXiv:2502.09914</a> <span> [<a href="https://arxiv.org/pdf/2502.09914">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> A Deep Learning Approach to Interface Color Quality Assessment in HCI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shixiao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Runsheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+J">Junliang Du</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+R">Ran Hao</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jiacheng Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09914v1-abstract-short" style="display: inline;"> In this paper, a quantitative evaluation model for the color quality of human-computer interaction interfaces is proposed by combining deep convolutional neural networks (CNN). By extracting multidimensional features of interface images, including hue, brightness, purity, etc., CNN is used for efficient feature modeling and quantitative analysis, and the relationship between interface design and u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09914v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09914v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09914v1-abstract-full" style="display: none;"> In this paper, a quantitative evaluation model for the color quality of human-computer interaction interfaces is proposed by combining deep convolutional neural networks (CNN). By extracting multidimensional features of interface images, including hue, brightness, purity, etc., CNN is used for efficient feature modeling and quantitative analysis, and the relationship between interface design and user perception is studied. The experiment is based on multiple international mainstream website interface datasets, covering e-commerce platforms, social media, education platforms, etc., and verifies the evaluation effect of the model on indicators such as contrast, clarity, color coordination, and visual appeal. The results show that the CNN evaluation is highly consistent with the user rating, with a correlation coefficient of up to 0.96, and it also shows high accuracy in mean square error and absolute error. Compared with traditional experience-based evaluation methods, the proposed model can efficiently and scientifically capture the visual characteristics of the interface and avoid the influence of subjective factors. Future research can explore the introduction of multimodal data (such as text and interactive behavior) into the model to further enhance the evaluation ability of dynamic interfaces and expand it to fields such as smart homes, medical systems, and virtual reality. This paper provides new methods and new ideas for the scientific evaluation and optimization of interface design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09914v1-abstract-full').style.display = 'none'; document.getElementById('2502.09914v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09860">arXiv:2502.09860</a> <span> [<a href="https://arxiv.org/pdf/2502.09860">pdf</a>, <a href="https://arxiv.org/format/2502.09860">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Gradient GA: Gradient Genetic Algorithm for Drug Molecular Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhuang%2C+C">Chris Zhuang</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+D">Debadyuti Mukherjee</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yingzhou Lu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+T">Tianfan Fu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09860v1-abstract-short" style="display: inline;"> Molecular discovery has brought great benefits to the chemical industry. Various molecule design techniques are developed to identify molecules with desirable properties. Traditional optimization methods, such as genetic algorithms, continue to achieve state-of-the-art results across multiple molecular design benchmarks. However, these techniques rely solely on random walk exploration, which hinde… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09860v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09860v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09860v1-abstract-full" style="display: none;"> Molecular discovery has brought great benefits to the chemical industry. Various molecule design techniques are developed to identify molecules with desirable properties. Traditional optimization methods, such as genetic algorithms, continue to achieve state-of-the-art results across multiple molecular design benchmarks. However, these techniques rely solely on random walk exploration, which hinders both the quality of the final solution and the convergence speed. To address this limitation, we propose a novel approach called Gradient Genetic Algorithm (Gradient GA), which incorporates gradient information from the objective function into genetic algorithms. Instead of random exploration, each proposed sample iteratively progresses toward an optimal solution by following the gradient direction. We achieve this by designing a differentiable objective function parameterized by a neural network and utilizing the Discrete Langevin Proposal to enable gradient guidance in discrete molecular spaces. Experimental results demonstrate that our method significantly improves both convergence speed and solution quality, outperforming cutting-edge techniques. For example, it achieves up to a 25% improvement in the top-10 score over the vanilla genetic algorithm. The code is publicly available at https://github.com/debadyuti23/GradientGA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09860v1-abstract-full').style.display = 'none'; document.getElementById('2502.09860v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09621">arXiv:2502.09621</a> <span> [<a href="https://arxiv.org/pdf/2502.09621">pdf</a>, <a href="https://arxiv.org/format/2502.09621">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MME-CoT: Benchmarking Chain-of-Thought in Large Multimodal Models for Reasoning Quality, Robustness, and Efficiency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+D">Dongzhi Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Renrui Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Ziyu Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yanwei Li</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Y">Yu Qi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xinyan Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liuhui Wang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+J">Jianhan Jin</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+C">Claire Guo</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+S">Shen Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bo Zhang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+C">Chaoyou Fu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+P">Peng Gao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongsheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09621v1-abstract-short" style="display: inline;"> Answering questions with Chain-of-Thought (CoT) has significantly enhanced the reasoning capabilities of Large Language Models (LLMs), yet its impact on Large Multimodal Models (LMMs) still lacks a systematic assessment and in-depth investigation. In this paper, we introduce MME-CoT, a specialized benchmark evaluating the CoT reasoning performance of LMMs, spanning six domains: math, science, OCR,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09621v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09621v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09621v1-abstract-full" style="display: none;"> Answering questions with Chain-of-Thought (CoT) has significantly enhanced the reasoning capabilities of Large Language Models (LLMs), yet its impact on Large Multimodal Models (LMMs) still lacks a systematic assessment and in-depth investigation. In this paper, we introduce MME-CoT, a specialized benchmark evaluating the CoT reasoning performance of LMMs, spanning six domains: math, science, OCR, logic, space-time, and general scenes. As the first comprehensive study in this area, we propose a thorough evaluation suite incorporating three novel metrics that assess the reasoning quality, robustness, and efficiency at a fine-grained level. Leveraging curated high-quality data and a unique evaluation strategy, we conduct an in-depth analysis of state-of-the-art LMMs, uncovering several key insights: 1) Models with reflection mechanism demonstrate a superior CoT quality, with Kimi k1.5 outperforming GPT-4o and demonstrating the highest quality results; 2) CoT prompting often degrades LMM performance on perception-heavy tasks, suggesting a potentially harmful overthinking behavior; and 3) Although the CoT quality is high, LMMs with reflection exhibit significant inefficiency in both normal response and self-correction phases. We hope MME-CoT serves as a foundation for advancing multimodal reasoning in LMMs. Project Page: https://mmecot.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09621v1-abstract-full').style.display = 'none'; document.getElementById('2502.09621v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://mmecot.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09620">arXiv:2502.09620</a> <span> [<a href="https://arxiv.org/pdf/2502.09620">pdf</a>, <a href="https://arxiv.org/format/2502.09620">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Potential of Encoder-free Architectures in 3D LMMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yiwen Tang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zoey Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhuhao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ray Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qizhi Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junli Liu</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+D">Delin Qu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhigang Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dong Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuelong Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bin Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09620v1-abstract-short" style="display: inline;"> Encoder-free architectures have been preliminarily explored in the 2D visual domain, yet it remains an open question whether they can be effectively applied to 3D understanding scenarios. In this paper, we present the first comprehensive investigation into the potential of encoder-free architectures to overcome the challenges of encoder-based 3D Large Multimodal Models (LMMs). These challenges inc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09620v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09620v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09620v1-abstract-full" style="display: none;"> Encoder-free architectures have been preliminarily explored in the 2D visual domain, yet it remains an open question whether they can be effectively applied to 3D understanding scenarios. In this paper, we present the first comprehensive investigation into the potential of encoder-free architectures to overcome the challenges of encoder-based 3D Large Multimodal Models (LMMs). These challenges include the failure to adapt to varying point cloud resolutions and the point features from the encoder not meeting the semantic needs of Large Language Models (LLMs). We identify key aspects for 3D LMMs to remove the encoder and enable the LLM to assume the role of the 3D encoder: 1) We propose the LLM-embedded Semantic Encoding strategy in the pre-training stage, exploring the effects of various point cloud self-supervised losses. And we present the Hybrid Semantic Loss to extract high-level semantics. 2) We introduce the Hierarchical Geometry Aggregation strategy in the instruction tuning stage. This incorporates inductive bias into the LLM early layers to focus on the local details of the point clouds. To the end, we present the first Encoder-free 3D LMM, ENEL. Our 7B model rivals the current state-of-the-art model, ShapeLLM-13B, achieving 55.0%, 50.92%, and 42.7% on the classification, captioning, and VQA tasks, respectively. Our results demonstrate that the encoder-free architecture is highly promising for replacing encoder-based architectures in the field of 3D understanding. The code is released at https://github.com/Ivan-Tang-3D/ENEL <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09620v1-abstract-full').style.display = 'none'; document.getElementById('2502.09620v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The code is released at https://github.com/Ivan-Tang-3D/ENEL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07942">arXiv:2502.07942</a> <span> [<a href="https://arxiv.org/pdf/2502.07942">pdf</a>, <a href="https://arxiv.org/format/2502.07942">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Symbiotic Cooperation for Web Agents: Harnessing Complementary Strengths of Large and Small LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mufan Qiu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zhen Tan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mohan Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+V">Vincent Lu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+J">Jie Peng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kaidi Xu</a>, <a href="/search/cs?searchtype=author&query=Agudelo%2C+L+Z">Leandro Z. Agudelo</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+P">Peter Qian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianlong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07942v1-abstract-short" style="display: inline;"> Web browsing agents powered by large language models (LLMs) have shown tremendous potential in automating complex web-based tasks. Existing approaches typically rely on large LLMs (e.g., GPT-4o) to explore web environments and generate trajectory data, which is then used either for demonstration retrieval (for large LLMs) or to distill small LLMs (e.g., Llama3) in a process that remains decoupled… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07942v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07942v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07942v1-abstract-full" style="display: none;"> Web browsing agents powered by large language models (LLMs) have shown tremendous potential in automating complex web-based tasks. Existing approaches typically rely on large LLMs (e.g., GPT-4o) to explore web environments and generate trajectory data, which is then used either for demonstration retrieval (for large LLMs) or to distill small LLMs (e.g., Llama3) in a process that remains decoupled from the exploration. In this paper, we propose AgentSymbiotic, an iterative framework that couples data synthesis with task-performance, yielding a "symbiotic improvement" for both large and small LLMs. Our study uncovers a complementary dynamic between LLM types: while large LLMs excel at generating high-quality trajectories for distillation, the distilled small LLMs-owing to their distinct reasoning capabilities-often choose actions that diverge from those of their larger counterparts. This divergence drives the exploration of novel trajectories, thereby enriching the synthesized data. However, we also observe that the performance of small LLMs becomes a bottleneck in this iterative enhancement process. To address this, we propose two innovations in LLM distillation: a speculative data synthesis strategy that mitigates off-policy bias, and a multi-task learning approach designed to boost the reasoning capabilities of the student LLM. Furthermore, we introduce a Hybrid Mode for Privacy Preservation to address user privacy concerns. Evaluated on the WEBARENA benchmark, AgentSymbiotic achieves SOTA performance with both LLM types. Our best Large LLM agent reaches 52%, surpassing the previous best of 45%, while our 8B distilled model demonstrates a competitive 49%, exceeding the prior best of 28%. Code will be released upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07942v1-abstract-full').style.display = 'none'; document.getElementById('2502.07942v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07579">arXiv:2502.07579</a> <span> [<a href="https://arxiv.org/pdf/2502.07579">pdf</a>, <a href="https://arxiv.org/format/2502.07579">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Single-Step Consistent Diffusion Samplers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jutras-Dub%C3%A9%2C+P">Pascal Jutras-Dub茅</a>, <a href="/search/cs?searchtype=author&query=Pynadath%2C+P">Patrick Pynadath</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07579v1-abstract-short" style="display: inline;"> Sampling from unnormalized target distributions is a fundamental yet challenging task in machine learning and statistics. Existing sampling algorithms typically require many iterative steps to produce high-quality samples, leading to high computational costs that limit their practicality in time-sensitive or resource-constrained settings. In this work, we introduce consistent diffusion samplers, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07579v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07579v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07579v1-abstract-full" style="display: none;"> Sampling from unnormalized target distributions is a fundamental yet challenging task in machine learning and statistics. Existing sampling algorithms typically require many iterative steps to produce high-quality samples, leading to high computational costs that limit their practicality in time-sensitive or resource-constrained settings. In this work, we introduce consistent diffusion samplers, a new class of samplers designed to generate high-fidelity samples in a single step. We first develop a distillation algorithm to train a consistent diffusion sampler from a pretrained diffusion model without pre-collecting large datasets of samples. Our algorithm leverages incomplete sampling trajectories and noisy intermediate states directly from the diffusion process. We further propose a method to train a consistent diffusion sampler from scratch, fully amortizing exploration by training a single model that both performs diffusion sampling and skips intermediate steps using a self-consistency loss. Through extensive experiments on a variety of unnormalized distributions, we show that our approach yields high-fidelity samples using less than 1% of the network evaluations required by traditional diffusion samplers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07579v1-abstract-full').style.display = 'none'; document.getElementById('2502.07579v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07302">arXiv:2502.07302</a> <span> [<a href="https://arxiv.org/pdf/2502.07302">pdf</a>, <a href="https://arxiv.org/format/2502.07302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CASC-AI: Consensus-aware Self-corrective AI Agents for Noise Cell Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yihe Yang</a>, <a href="/search/cs?searchtype=author&query=Pisapia%2C+D+J">David J. Pisapia</a>, <a href="/search/cs?searchtype=author&query=Liechty%2C+B">Benjamin Liechty</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Junchao Zhu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+J">Juming Xiong</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junlin Guo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zhengyi Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xing Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+R">Runxuan Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rendong Zhang</a>, <a href="/search/cs?searchtype=author&query=Rudravaram%2C+G">Gaurav Rudravaram</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/cs?searchtype=author&query=Sarder%2C+P">Pinaki Sarder</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a>, <a href="/search/cs?searchtype=author&query=Sabuncu%2C+M+R">Mert R. Sabuncu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07302v1-abstract-short" style="display: inline;"> Multi-class cell segmentation in high-resolution gigapixel whole slide images (WSI) is crucial for various clinical applications. However, training such models typically requires labor-intensive, pixel-wise annotations by domain experts. Recent efforts have democratized this process by involving lay annotators without medical expertise. However, conventional non-agent-based approaches struggle to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07302v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07302v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07302v1-abstract-full" style="display: none;"> Multi-class cell segmentation in high-resolution gigapixel whole slide images (WSI) is crucial for various clinical applications. However, training such models typically requires labor-intensive, pixel-wise annotations by domain experts. Recent efforts have democratized this process by involving lay annotators without medical expertise. However, conventional non-agent-based approaches struggle to handle annotation noise adaptively, as they lack mechanisms to mitigate false positives (FP) and false negatives (FN) at both the image-feature and pixel levels. In this paper, we propose a consensus-aware self-corrective AI agent that leverages the Consensus Matrix to guide its learning process. The Consensus Matrix defines regions where both the AI and annotators agree on cell and non-cell annotations, which are prioritized with stronger supervision. Conversely, areas of disagreement are adaptively weighted based on their feature similarity to high-confidence agreement regions, with more similar regions receiving greater attention. Additionally, contrastive learning is employed to separate features of noisy regions from those of reliable agreement regions by maximizing their dissimilarity. This paradigm enables the AI to iteratively refine noisy labels, enhancing its robustness. Validated on one real-world lay-annotated cell dataset and two simulated noisy datasets, our method demonstrates improved segmentation performance, effectively correcting FP and FN errors and showcasing its potential for training robust models on noisy datasets. The official implementation and cell annotations are publicly available at https://github.com/ddrrnn123/CASC-AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07302v1-abstract-full').style.display = 'none'; document.getElementById('2502.07302v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06589">arXiv:2502.06589</a> <span> [<a href="https://arxiv.org/pdf/2502.06589">pdf</a>, <a href="https://arxiv.org/format/2502.06589">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hephaestus: Improving Fundamental Agent Capabilities of Large Language Models through Continual Pre-Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhuang%2C+Y">Yuchen Zhuang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jingfeng Yang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Haoming Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xin Liu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+K">Kewei Cheng</a>, <a href="/search/cs?searchtype=author&query=Lokegaonkar%2C+S">Sanket Lokegaonkar</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yifan Gao</a>, <a href="/search/cs?searchtype=author&query=Ping%2C+Q">Qing Ping</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianyi Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Binxuan Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhengyang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+P">Pei Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruijie Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rongzhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zalmout%2C+N">Nasser Zalmout</a>, <a href="/search/cs?searchtype=author&query=Nigam%2C+P">Priyanka Nigam</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+B">Bing Yin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06589v1-abstract-short" style="display: inline;"> Due to the scarcity of agent-oriented pre-training data, LLM-based autonomous agents typically rely on complex prompting or extensive fine-tuning, which often fails to introduce new capabilities while preserving strong generalizability. We introduce Hephaestus-Forge, the first large-scale pre-training corpus designed to enhance the fundamental capabilities of LLM agents in API function calling, in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06589v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06589v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06589v1-abstract-full" style="display: none;"> Due to the scarcity of agent-oriented pre-training data, LLM-based autonomous agents typically rely on complex prompting or extensive fine-tuning, which often fails to introduce new capabilities while preserving strong generalizability. We introduce Hephaestus-Forge, the first large-scale pre-training corpus designed to enhance the fundamental capabilities of LLM agents in API function calling, intrinsic reasoning and planning, and adapting to environmental feedback. Hephaestus-Forge comprises 103B agent-specific data encompassing 76,537 APIs, including both tool documentation to introduce knowledge of API functions and function calling trajectories to strengthen intrinsic reasoning. To explore effective training protocols, we investigate scaling laws to identify the optimal recipe in data mixing ratios. By continual pre-training on Hephaestus-Forge, Hephaestus outperforms small- to medium-scale open-source LLMs and rivals commercial LLMs on three agent benchmarks, demonstrating the effectiveness of our pre-training corpus in enhancing fundamental agentic capabilities and generalization of LLMs to new tasks or environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06589v1-abstract-full').style.display = 'none'; document.getElementById('2502.06589v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL 2025 main conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06269">arXiv:2502.06269</a> <span> [<a href="https://arxiv.org/pdf/2502.06269">pdf</a>, <a href="https://arxiv.org/format/2502.06269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Progressive Collaborative and Semantic Knowledge Fusion for Generative Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Longtao Xiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haozhao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Cheng Wang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+L">Linfei Ji</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yifan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jieming Zhu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zhenhua Dong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruixuan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06269v1-abstract-short" style="display: inline;"> With the recent surge in interest surrounding generative paradigms, generative recommendation has increasingly attracted the attention of researchers in the recommendation community. This paradigm generally consists of two stages. In the first stage, pretrained semantic embeddings or collaborative ID embeddings are quantized to create item codes, aiming to capture and preserve rich semantic or col… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06269v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06269v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06269v1-abstract-full" style="display: none;"> With the recent surge in interest surrounding generative paradigms, generative recommendation has increasingly attracted the attention of researchers in the recommendation community. This paradigm generally consists of two stages. In the first stage, pretrained semantic embeddings or collaborative ID embeddings are quantized to create item codes, aiming to capture and preserve rich semantic or collaborative knowledge within these codes. The second stage involves utilizing these discrete codes to perform an autoregressive sequence generation task. Existing methods often either overlook collaborative or semantic knowledge, or combine the two roughly. In this paper, we observe that naively concatenating representations from semantic and collaborative modality leads to a semantic domination issue, where the resulting representation is overly influenced by semantic information, effectively overshadowing the collaborative representation. Consequently, downstream recommendation tasks fail to fully exploit the knowledge from both modalities, resulting in suboptimal performance. To address this, we propose a progressive collaborative and semantic knowledge fusion model for generative recommendation, named PRORec, which integrates semantic and collaborative knowledge with a unified code through a two-stage framework. Specifically, in the first stage, we propose a cross-modality knowledge alignment task, which integrates semantic knowledge into collaborative embeddings, enhancing their representational capability. In the second stage, we propose an in-modality knowledge distillation task, designed to effectively capture and integrate knowledge from both semantic and collaborative modalities. Extensive experiments on three widely used benchmarks validate the effectiveness of our approach, demonstrating its superiority compared to existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06269v1-abstract-full').style.display = 'none'; document.getElementById('2502.06269v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06075">arXiv:2502.06075</a> <span> [<a href="https://arxiv.org/pdf/2502.06075">pdf</a>, <a href="https://arxiv.org/format/2502.06075">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Deconstructing Depression Stigma: Integrating AI-driven Data Collection and Analysis with Causal Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Meng%2C+H">Han Meng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Renwen Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Ganyi Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yitian Yang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+P">Peinuan Qin</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jungup Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Yi-Chieh Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06075v1-abstract-short" style="display: inline;"> Mental-illness stigma is a persistent social problem, hampering both treatment-seeking and recovery. Accordingly, there is a pressing need to understand it more clearly, but analyzing the relevant data is highly labor-intensive. Therefore, we designed a chatbot to engage participants in conversations; coded those conversations qualitatively with AI assistance; and, based on those coding results, b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06075v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06075v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06075v1-abstract-full" style="display: none;"> Mental-illness stigma is a persistent social problem, hampering both treatment-seeking and recovery. Accordingly, there is a pressing need to understand it more clearly, but analyzing the relevant data is highly labor-intensive. Therefore, we designed a chatbot to engage participants in conversations; coded those conversations qualitatively with AI assistance; and, based on those coding results, built causal knowledge graphs to decode stigma. The results we obtained from 1,002 participants demonstrate that conversation with our chatbot can elicit rich information about people's attitudes toward depression, while our AI-assisted coding was strongly consistent with human-expert coding. Our novel approach combining large language models (LLMs) and causal knowledge graphs uncovered patterns in individual responses and illustrated the interrelationships of psychological constructs in the dataset as a whole. The paper also discusses these findings' implications for HCI researchers in developing digital interventions, decomposing human psychological constructs, and fostering inclusive attitudes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06075v1-abstract-full').style.display = 'none'; document.getElementById('2502.06075v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Conditionally accepted to CHI Conference on Human Factors in Computing Systems (CHI'25)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05425">arXiv:2502.05425</a> <span> [<a href="https://arxiv.org/pdf/2502.05425">pdf</a>, <a href="https://arxiv.org/format/2502.05425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TITS.2025.3535932">10.1109/TITS.2025.3535932 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Toward Copyright Integrity and Verifiability via Multi-Bit Watermarking for Intelligent Transportation Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yihao Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lingxiao Li</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yifan Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ru Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianyi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05425v1-abstract-short" style="display: inline;"> Intelligent transportation systems (ITS) use advanced technologies such as artificial intelligence to significantly improve traffic flow management efficiency, and promote the intelligent development of the transportation industry. However, if the data in ITS is attacked, such as tampering or forgery, it will endanger public safety and cause social losses. Therefore, this paper proposes a watermar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05425v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05425v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05425v1-abstract-full" style="display: none;"> Intelligent transportation systems (ITS) use advanced technologies such as artificial intelligence to significantly improve traffic flow management efficiency, and promote the intelligent development of the transportation industry. However, if the data in ITS is attacked, such as tampering or forgery, it will endanger public safety and cause social losses. Therefore, this paper proposes a watermarking that can verify the integrity of copyright in response to the needs of ITS, termed ITSmark. ITSmark focuses on functions such as extracting watermarks, verifying permission, and tracing tampered locations. The scheme uses the copyright information to build the multi-bit space and divides this space into multiple segments. These segments will be assigned to tokens. Thus, the next token is determined by its segment which contains the copyright. In this way, the obtained data contains the custom watermark. To ensure the authorization, key parameters are encrypted during copyright embedding to obtain cipher data. Only by possessing the correct cipher data and private key, can the user entirely extract the watermark. Experiments show that ITSmark surpasses baseline performances in data quality, extraction accuracy, and unforgeability. It also shows unique capabilities of permission verification and tampered location tracing, which ensures the security of extraction and the reliability of copyright verification. Furthermore, ITSmark can also customize the watermark embedding position and proportion according to user needs, making embedding more flexible. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05425v1-abstract-full').style.display = 'none'; document.getElementById('2502.05425v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 figures, 10 tables. Accepted for publication in IEEE Transactions on Intelligent Transportation Systems (accepted versions, not the IEEE-published versions). 漏2025 IEEE. All rights reserved, including rights for text and data mining, and training of artificial intelligence and similar technologies. Personal use is permitted, but republication/redistribution requires IEEE permission</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Intelligent Transportation Systems, 07 February 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05210">arXiv:2502.05210</a> <span> [<a href="https://arxiv.org/pdf/2502.05210">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistical Finance">q-fin.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Regression and Forecasting of U.S. Stock Returns Based on LSTM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Shicheng Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zizhou Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rong Zhang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Y">Yuchen Yin</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+C+H">Chia Hong Chang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Q">Qinyan Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05210v1-abstract-short" style="display: inline;"> This paper analyses the investment returns of three stock sectors, Manuf, Hitec, and Other, in the U.S. stock market, based on the Fama-French three-factor model, the Carhart four-factor model, and the Fama-French five-factor model, in order to test the validity of the Fama-French three-factor model, the Carhart four-factor model, and the Fama-French five-factor model for the three sectors of the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05210v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05210v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05210v1-abstract-full" style="display: none;"> This paper analyses the investment returns of three stock sectors, Manuf, Hitec, and Other, in the U.S. stock market, based on the Fama-French three-factor model, the Carhart four-factor model, and the Fama-French five-factor model, in order to test the validity of the Fama-French three-factor model, the Carhart four-factor model, and the Fama-French five-factor model for the three sectors of the market. French five-factor model for the three sectors of the market. Also, the LSTM model is used to explore the additional factors affecting stock returns. The empirical results show that the Fama-French five-factor model has better validity for the three segments of the market under study, and the LSTM model has the ability to capture the factors affecting the returns of certain industries, and can better regress and predict the stock returns of the relevant industries. Keywords- Fama-French model; Carhart model; Factor model; LSTM model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05210v1-abstract-full').style.display = 'none'; document.getElementById('2502.05210v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04804">arXiv:2502.04804</a> <span> [<a href="https://arxiv.org/pdf/2502.04804">pdf</a>, <a href="https://arxiv.org/format/2502.04804">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DetVPCC: RoI-based Point Cloud Sequence Compression for 3D Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+M">Mingxuan Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruijie Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+X">Xuedou Xiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04804v1-abstract-short" style="display: inline;"> While MPEG-standardized video-based point cloud compression (VPCC) achieves high compression efficiency for human perception, it struggles with a poor trade-off between bitrate savings and detection accuracy when supporting 3D object detectors. This limitation stems from VPCC's inability to prioritize regions of different importance within point clouds. To address this issue, we propose DetVPCC, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04804v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04804v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04804v1-abstract-full" style="display: none;"> While MPEG-standardized video-based point cloud compression (VPCC) achieves high compression efficiency for human perception, it struggles with a poor trade-off between bitrate savings and detection accuracy when supporting 3D object detectors. This limitation stems from VPCC's inability to prioritize regions of different importance within point clouds. To address this issue, we propose DetVPCC, a novel method integrating region-of-interest (RoI) encoding with VPCC for efficient point cloud sequence compression while preserving the 3D object detection accuracy. Specifically, we augment VPCC to support RoI-based compression by assigning spatially non-uniform quality levels. Then, we introduce a lightweight RoI detector to identify crucial regions that potentially contain objects. Experiments on the nuScenes dataset demonstrate that our approach significantly improves the detection accuracy. The code and demo video are available in supplementary materials. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04804v1-abstract-full').style.display = 'none'; document.getElementById('2502.04804v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04111">arXiv:2502.04111</a> <span> [<a href="https://arxiv.org/pdf/2502.04111">pdf</a>, <a href="https://arxiv.org/format/2502.04111">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICME57554.2024.10688017">10.1109/ICME57554.2024.10688017 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Adaptive Margin Contrastive Learning for Ambiguity-aware 3D Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yang Chen</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+Y">Yueqi Duan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Runzhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Y">Yap-Peng Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04111v1-abstract-short" style="display: inline;"> In this paper, we propose an adaptive margin contrastive learning method for 3D point cloud semantic segmentation, namely AMContrast3D. Most existing methods use equally penalized objectives, which ignore per-point ambiguities and less discriminated features stemming from transition regions. However, as highly ambiguous points may be indistinguishable even for humans, their manually annotated labe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04111v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04111v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04111v1-abstract-full" style="display: none;"> In this paper, we propose an adaptive margin contrastive learning method for 3D point cloud semantic segmentation, namely AMContrast3D. Most existing methods use equally penalized objectives, which ignore per-point ambiguities and less discriminated features stemming from transition regions. However, as highly ambiguous points may be indistinguishable even for humans, their manually annotated labels are less reliable, and hard constraints over these points would lead to sub-optimal models. To address this, we design adaptive objectives for individual points based on their ambiguity levels, aiming to ensure the correctness of low-ambiguity points while allowing mistakes for high-ambiguity points. Specifically, we first estimate ambiguities based on position embeddings. Then, we develop a margin generator to shift decision boundaries for contrastive feature embeddings, so margins are narrowed due to increasing ambiguities with even negative margins for extremely high-ambiguity points. Experimental results on large-scale datasets, S3DIS and ScanNet, demonstrate that our method outperforms state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04111v1-abstract-full').style.display = 'none'; document.getElementById('2502.04111v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2024 IEEE International Conference on Multimedia and Expo (ICME) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03685">arXiv:2502.03685</a> <span> [<a href="https://arxiv.org/pdf/2502.03685">pdf</a>, <a href="https://arxiv.org/format/2502.03685">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Controlled LLM Decoding via Discrete Auto-regressive Biasing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pynadath%2C+P">Patrick Pynadath</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03685v1-abstract-short" style="display: inline;"> Controlled text generation allows for enforcing user-defined constraints on large language model outputs, an increasingly important field as LLMs become more prevalent in everyday life. One common approach uses energy-based decoding, which defines a target distribution through an energy function that combines multiple constraints into a weighted average. However, these methods often struggle to ba… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03685v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03685v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03685v1-abstract-full" style="display: none;"> Controlled text generation allows for enforcing user-defined constraints on large language model outputs, an increasingly important field as LLMs become more prevalent in everyday life. One common approach uses energy-based decoding, which defines a target distribution through an energy function that combines multiple constraints into a weighted average. However, these methods often struggle to balance fluency with constraint satisfaction, even with extensive tuning of the energy function's coefficients. In this paper, we identify that this suboptimal balance arises from sampling in continuous space rather than the natural discrete space of text tokens. To address this, we propose Discrete Auto-regressive Biasing, a controlled decoding algorithm that leverages gradients while operating entirely in the discrete text domain. Specifically, we introduce a new formulation for controlled text generation by defining a joint distribution over the generated sequence and an auxiliary bias sequence. To efficiently sample from this joint distribution, we propose a Langevin-within-Gibbs sampling algorithm using gradient-based discrete MCMC. Our method significantly improves constraint satisfaction while maintaining comparable or better fluency, all with even lower computational costs. We demonstrate the advantages of our controlled decoding method on sentiment control, language detoxification, and keyword-guided generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03685v1-abstract-full').style.display = 'none'; document.getElementById('2502.03685v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03506">arXiv:2502.03506</a> <span> [<a href="https://arxiv.org/pdf/2502.03506">pdf</a>, <a href="https://arxiv.org/format/2502.03506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Optimistic 蔚-Greedy Exploration for Cooperative Multi-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruoning Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Siying Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenyu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yang Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhitong Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zixuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruijie Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03506v1-abstract-short" style="display: inline;"> The Centralized Training with Decentralized Execution (CTDE) paradigm is widely used in cooperative multi-agent reinforcement learning. However, due to the representational limitations of traditional monotonic value decomposition methods, algorithms can underestimate optimal actions, leading policies to suboptimal solutions. To address this challenge, we propose Optimistic $蔚$-Greedy Exploration,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03506v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03506v1-abstract-full" style="display: none;"> The Centralized Training with Decentralized Execution (CTDE) paradigm is widely used in cooperative multi-agent reinforcement learning. However, due to the representational limitations of traditional monotonic value decomposition methods, algorithms can underestimate optimal actions, leading policies to suboptimal solutions. To address this challenge, we propose Optimistic $蔚$-Greedy Exploration, focusing on enhancing exploration to correct value estimations. The underestimation arises from insufficient sampling of optimal actions during exploration, as our analysis indicated. We introduce an optimistic updating network to identify optimal actions and sample actions from its distribution with a probability of $蔚$ during exploration, increasing the selection frequency of optimal actions. Experimental results in various environments reveal that the Optimistic $蔚$-Greedy Exploration effectively prevents the algorithm from suboptimal solutions and significantly improves its performance compared to other algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03506v1-abstract-full').style.display = 'none'; document.getElementById('2502.03506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03125">arXiv:2502.03125</a> <span> [<a href="https://arxiv.org/pdf/2502.03125">pdf</a>, <a href="https://arxiv.org/format/2502.03125">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Double Distillation Network for Multi-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yang Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Siying Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenyu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruoning Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhitong Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zixuan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03125v1-abstract-short" style="display: inline;"> Multi-agent reinforcement learning typically employs a centralized training-decentralized execution (CTDE) framework to alleviate the non-stationarity in environment. However, the partial observability during execution may lead to cumulative gap errors gathered by agents, impairing the training of effective collaborative policies. To overcome this challenge, we introduce the Double Distillation Ne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03125v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03125v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03125v1-abstract-full" style="display: none;"> Multi-agent reinforcement learning typically employs a centralized training-decentralized execution (CTDE) framework to alleviate the non-stationarity in environment. However, the partial observability during execution may lead to cumulative gap errors gathered by agents, impairing the training of effective collaborative policies. To overcome this challenge, we introduce the Double Distillation Network (DDN), which incorporates two distillation modules aimed at enhancing robust coordination and facilitating the collaboration process under constrained information. The external distillation module uses a global guiding network and a local policy network, employing distillation to reconcile the gap between global training and local execution. In addition, the internal distillation module introduces intrinsic rewards, drawn from state information, to enhance the exploration capabilities of agents. Extensive experiments demonstrate that DDN significantly improves performance across multiple scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03125v1-abstract-full').style.display = 'none'; document.getElementById('2502.03125v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02911">arXiv:2502.02911</a> <span> [<a href="https://arxiv.org/pdf/2502.02911">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> The Benefits of Prosociality towards AI Agents: Examining the Effects of Helping AI Agents on Human Well-Being </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zicheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Y">Yugin Tan</a>, <a href="/search/cs?searchtype=author&query=Yamashita%2C+N">Naomi Yamashita</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Yi-Chieh Lee</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Renwen Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02911v1-abstract-short" style="display: inline;"> Prosocial behaviors, such as helping others, are well-known to enhance human well-being. While there is a growing trend of humans helping AI agents, it remains unclear whether the well-being benefits of helping others extend to interactions with non-human entities. To address this, we conducted an experiment (N = 295) to explore how helping AI agents impacts human well-being, especially when the a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02911v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02911v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02911v1-abstract-full" style="display: none;"> Prosocial behaviors, such as helping others, are well-known to enhance human well-being. While there is a growing trend of humans helping AI agents, it remains unclear whether the well-being benefits of helping others extend to interactions with non-human entities. To address this, we conducted an experiment (N = 295) to explore how helping AI agents impacts human well-being, especially when the agents fulfill human basic psychological needs--relatedness, competence, and autonomy--during the interaction. Our findings showed that helping AI agents reduced participants' feelings of loneliness. When AI met participants' needs for competence and autonomy during the helping process, there was a further decrease in loneliness and an increase in positive affect. However, when AI did not meet participants' need for relatedness, participants experienced an increase in positive affect. We discuss the implications of these findings for understanding how AI can support human well-being. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02911v1-abstract-full').style.display = 'none'; document.getElementById('2502.02911v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02875">arXiv:2502.02875</a> <span> [<a href="https://arxiv.org/pdf/2502.02875">pdf</a>, <a href="https://arxiv.org/format/2502.02875">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Heterogeneous Value Decomposition Policy Fusion for Multi-Agent Cooperation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Siying Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yang Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhitong Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruoning Zhang</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+J">Jinliang Shao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenyu Chen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yuhua Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02875v1-abstract-short" style="display: inline;"> Value decomposition (VD) has become one of the most prominent solutions in cooperative multi-agent reinforcement learning. Most existing methods generally explore how to factorize the joint value and minimize the discrepancies between agent observations and characteristics of environmental states. However, direct decomposition may result in limited representation or difficulty in optimization. Ort… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02875v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02875v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02875v1-abstract-full" style="display: none;"> Value decomposition (VD) has become one of the most prominent solutions in cooperative multi-agent reinforcement learning. Most existing methods generally explore how to factorize the joint value and minimize the discrepancies between agent observations and characteristics of environmental states. However, direct decomposition may result in limited representation or difficulty in optimization. Orthogonal to designing a new factorization scheme, in this paper, we propose Heterogeneous Policy Fusion (HPF) to integrate the strengths of various VD methods. We construct a composite policy set to select policies for interaction adaptively. Specifically, this adaptive mechanism allows agents' trajectories to benefit from diverse policy transitions while incorporating the advantages of each factorization method. Additionally, HPF introduces a constraint between these heterogeneous policies to rectify the misleading update caused by the unexpected exploratory or suboptimal non-cooperation. Experimental results on cooperative tasks show HPF's superior performance over multiple baselines, proving its effectiveness and ease of implementation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02875v1-abstract-full').style.display = 'none'; document.getElementById('2502.02875v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+R&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>