Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 818 results for author: <span class="mathjax">Yan, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Yan%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Yan, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Yan%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Yan, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17436">arXiv:2502.17436</a> <span> [<a href="https://arxiv.org/pdf/2502.17436">pdf</a>, <a href="https://arxiv.org/format/2502.17436">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Hierarchical Rectified Flow </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichi Zhang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yici Yan</a>, <a href="/search/cs?searchtype=author&query=Schwing%2C+A">Alex Schwing</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhizhen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17436v1-abstract-short" style="display: inline;"> We formulate a hierarchical rectified flow to model data distributions. It hierarchically couples multiple ordinary differential equations (ODEs) and defines a time-differentiable stochastic process that generates a data distribution from a known source distribution. Each ODE resembles the ODE that is solved in a classic rectified flow, but differs in its domain, i.e., location, velocity, accelera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17436v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17436v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17436v1-abstract-full" style="display: none;"> We formulate a hierarchical rectified flow to model data distributions. It hierarchically couples multiple ordinary differential equations (ODEs) and defines a time-differentiable stochastic process that generates a data distribution from a known source distribution. Each ODE resembles the ODE that is solved in a classic rectified flow, but differs in its domain, i.e., location, velocity, acceleration, etc. Unlike the classic rectified flow formulation, which formulates a single ODE in the location domain and only captures the expected velocity field (sufficient to capture a multi-modal data distribution), the hierarchical rectified flow formulation models the multi-modal random velocity field, acceleration field, etc., in their entirety. This more faithful modeling of the random velocity field enables integration paths to intersect when the underlying ODE is solved during data generation. Intersecting paths in turn lead to integration trajectories that are more straight than those obtained in the classic rectified flow formulation, where integration paths cannot intersect. This leads to modeling of data distributions with fewer neural function evaluations. We empirically verify this on synthetic 1D and 2D data as well as MNIST, CIFAR-10, and ImageNet-32 data. Code is available at: https://riccizz.github.io/HRF/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17436v1-abstract-full').style.display = 'none'; document.getElementById('2502.17436v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025; Project Page: https://riccizz.github.io/HRF/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17315">arXiv:2502.17315</a> <span> [<a href="https://arxiv.org/pdf/2502.17315">pdf</a>, <a href="https://arxiv.org/format/2502.17315">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> HIPPO: Enhancing the Table Understanding Capability of Large Language Models through Hybrid-Modal Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haolan Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinze Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Q">Qiushi Xiong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaocui Yang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yu Gu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Q">Qi Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fangfang Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17315v1-abstract-short" style="display: inline;"> Tabular data contains rich structural semantics and plays a crucial role in organizing and manipulating information. To better capture these structural semantics, this paper introduces the HybrId-modal Preference oPtimizatiOn (HIPPO) model, which represents tables using both text and image, and optimizes MLLMs to effectively learn more comprehensive table information from these multiple modalities… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17315v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17315v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17315v1-abstract-full" style="display: none;"> Tabular data contains rich structural semantics and plays a crucial role in organizing and manipulating information. To better capture these structural semantics, this paper introduces the HybrId-modal Preference oPtimizatiOn (HIPPO) model, which represents tables using both text and image, and optimizes MLLMs to effectively learn more comprehensive table information from these multiple modalities. Specifically, HIPPO samples model responses from hybrid-modal table representations and designs a modality-consistent sampling strategy to enhance response diversity and mitigate modality bias during DPO training. Experimental results on table question answering and table fact verification tasks demonstrate the effectiveness of HIPPO, achieving a 4% improvement over various table reasoning models. Further analysis reveals that HIPPO not only enhances reasoning abilities based on unimodal table representations but also facilitates the extraction of crucial and distinct semantics from different modal representations. All data and codes are available at https://github.com/NEUIR/HIPPO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17315v1-abstract-full').style.display = 'none'; document.getElementById('2502.17315v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17297">arXiv:2502.17297</a> <span> [<a href="https://arxiv.org/pdf/2502.17297">pdf</a>, <a href="https://arxiv.org/format/2502.17297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Retrieval-Augmented Generation in Multi-Modal Contexts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xingsheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+T">Tianshuo Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xiaoyuan Yi</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yu Gu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17297v1-abstract-short" style="display: inline;"> This paper introduces Multi-Modal Retrieval-Augmented Generation (M^2RAG), a benchmark designed to evaluate the effectiveness of Multi-modal Large Language Models (MLLMs) in leveraging knowledge from multi-modal retrieval documents. The benchmark comprises four tasks: image captioning, multi-modal question answering, multi-modal fact verification, and image reranking. All tasks are set in an open-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17297v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17297v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17297v1-abstract-full" style="display: none;"> This paper introduces Multi-Modal Retrieval-Augmented Generation (M^2RAG), a benchmark designed to evaluate the effectiveness of Multi-modal Large Language Models (MLLMs) in leveraging knowledge from multi-modal retrieval documents. The benchmark comprises four tasks: image captioning, multi-modal question answering, multi-modal fact verification, and image reranking. All tasks are set in an open-domain setting, requiring RAG models to retrieve query-relevant information from a multi-modal document collection and use it as input context for RAG modeling. To enhance the context utilization capabilities of MLLMs, we also introduce Multi-Modal Retrieval-Augmented Instruction Tuning (MM-RAIT), an instruction tuning method that optimizes MLLMs within multi-modal contexts. Our experiments show that MM-RAIT improves the performance of RAG systems by enabling them to effectively learn from multi-modal contexts. All data and code are available at https://github.com/NEUIR/M2RAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17297v1-abstract-full').style.display = 'none'; document.getElementById('2502.17297v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17057">arXiv:2502.17057</a> <span> [<a href="https://arxiv.org/pdf/2502.17057">pdf</a>, <a href="https://arxiv.org/format/2502.17057">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LLM-QE: Improving Query Expansion by Aligning Large Language Models with Ranking Preferences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+S">Sijia Yao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+P">Pengcheng Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yu Gu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shi Yu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17057v1-abstract-short" style="display: inline;"> Query expansion plays a crucial role in information retrieval, which aims to bridge the semantic gap between queries and documents to improve matching performance. This paper introduces LLM-QE, a novel approach that leverages Large Language Models (LLMs) to generate document-based query expansions, thereby enhancing dense retrieval models. Unlike traditional methods, LLM-QE designs both rank-based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17057v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17057v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17057v1-abstract-full" style="display: none;"> Query expansion plays a crucial role in information retrieval, which aims to bridge the semantic gap between queries and documents to improve matching performance. This paper introduces LLM-QE, a novel approach that leverages Large Language Models (LLMs) to generate document-based query expansions, thereby enhancing dense retrieval models. Unlike traditional methods, LLM-QE designs both rank-based and answer-based rewards and uses these reward models to optimize LLMs to align with the ranking preferences of both retrievers and LLMs, thus mitigating the hallucination of LLMs during query expansion. Our experiments on the zero-shot dense retrieval model, Contriever, demonstrate the effectiveness of LLM-QE, achieving an improvement of over 8%. Furthermore, by incorporating answer-based reward modeling, LLM-QE generates more relevant and precise information related to the documents, rather than simply producing redundant tokens to maximize rank-based rewards. Notably, LLM-QE also improves the training process of dense retrievers, achieving a more than 5% improvement after fine-tuning. All codes are available at https://github.com/NEUIR/LLM-QE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17057v1-abstract-full').style.display = 'none'; document.getElementById('2502.17057v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 5 tables, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16104">arXiv:2502.16104</a> <span> [<a href="https://arxiv.org/pdf/2502.16104">pdf</a>, <a href="https://arxiv.org/format/2502.16104">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Set a Thief to Catch a Thief: Combating Label Noise through Noisy Meta Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hanxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+N">Na Lu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xueying Zhao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuxuan Yan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+K">Kaipeng Ma</a>, <a href="/search/cs?searchtype=author&query=Keong%2C+K+C">Kwoh Chee Keong</a>, <a href="/search/cs?searchtype=author&query=Carneiro%2C+G">Gustavo Carneiro</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16104v1-abstract-short" style="display: inline;"> Learning from noisy labels (LNL) aims to train high-performance deep models using noisy datasets. Meta learning based label correction methods have demonstrated remarkable performance in LNL by designing various meta label rectification tasks. However, extra clean validation set is a prerequisite for these methods to perform label correction, requiring extra labor and greatly limiting their practi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16104v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16104v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16104v1-abstract-full" style="display: none;"> Learning from noisy labels (LNL) aims to train high-performance deep models using noisy datasets. Meta learning based label correction methods have demonstrated remarkable performance in LNL by designing various meta label rectification tasks. However, extra clean validation set is a prerequisite for these methods to perform label correction, requiring extra labor and greatly limiting their practicality. To tackle this issue, we propose a novel noisy meta label correction framework STCT, which counterintuitively uses noisy data to correct label noise, borrowing the spirit in the saying ``Set a Thief to Catch a Thief''. The core idea of STCT is to leverage noisy data which is i.i.d. with the training data as a validation set to evaluate model performance and perform label correction in a meta learning framework, eliminating the need for extra clean data. By decoupling the complex bi-level optimization in meta learning into representation learning and label correction, STCT is solved through an alternating training strategy between noisy meta correction and semi-supervised representation learning. Extensive experiments on synthetic and real-world datasets demonstrate the outstanding performance of STCT, particularly in high noise rate scenarios. STCT achieves 96.9% label correction and 95.2% classification performance on CIFAR-10 with 80% symmetric noise, significantly surpassing the current state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16104v1-abstract-full').style.display = 'none'; document.getElementById('2502.16104v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15786">arXiv:2502.15786</a> <span> [<a href="https://arxiv.org/pdf/2502.15786">pdf</a>, <a href="https://arxiv.org/format/2502.15786">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MindLLM: A Subject-Agnostic and Versatile Model for fMRI-to-Text Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+W">Weikang Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zheng Huang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Haoyu Hu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+A">Aosong Feng</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yujun Yan</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+R">Rex Ying</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15786v1-abstract-short" style="display: inline;"> Decoding functional magnetic resonance imaging (fMRI) signals into text has been a key challenge in the neuroscience community, with the potential to advance brain-computer interfaces and uncover deeper insights into brain mechanisms. However, existing approaches often struggle with suboptimal predictive performance, limited task variety, and poor generalization across subjects. In response to thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15786v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15786v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15786v1-abstract-full" style="display: none;"> Decoding functional magnetic resonance imaging (fMRI) signals into text has been a key challenge in the neuroscience community, with the potential to advance brain-computer interfaces and uncover deeper insights into brain mechanisms. However, existing approaches often struggle with suboptimal predictive performance, limited task variety, and poor generalization across subjects. In response to this, we propose MindLLM, a model designed for subject-agnostic and versatile fMRI-to-text decoding. MindLLM consists of an fMRI encoder and an off-the-shelf LLM. The fMRI encoder employs a neuroscience-informed attention mechanism, which is capable of accommodating subjects with varying input shapes and thus achieves high-performance subject-agnostic decoding. Moreover, we introduce Brain Instruction Tuning (BIT), a novel approach that enhances the model's ability to capture diverse semantic representations from fMRI signals, facilitating more versatile decoding. We evaluate MindLLM on comprehensive fMRI-to-text benchmarks. Results demonstrate that our model outperforms the baselines, improving downstream tasks by 12.0%, unseen subject generalization by 16.4%, and novel task adaptation by 25.0%. Furthermore, the attention patterns in MindLLM provide interpretable insights into its decision-making process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15786v1-abstract-full').style.display = 'none'; document.getElementById('2502.15786v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15543">arXiv:2502.15543</a> <span> [<a href="https://arxiv.org/pdf/2502.15543">pdf</a>, <a href="https://arxiv.org/format/2502.15543">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PIP-KAG: Mitigating Knowledge Conflicts in Knowledge-Augmented Generation via Parametric Pruning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+P">Pengcheng Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xiaoyuan Yi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+T">Tong Xiao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+C">Chenyan Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15543v1-abstract-short" style="display: inline;"> Knowledge-Augmented Generation (KAG) has shown great promise in updating the internal memory of Large Language Models (LLMs) by integrating external knowledge. However, KAG inevitably faces knowledge conflicts when the internal memory contradicts external information. Current approaches to mitigating these conflicts mainly focus on improving external knowledge utilization. However, these methods h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15543v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15543v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15543v1-abstract-full" style="display: none;"> Knowledge-Augmented Generation (KAG) has shown great promise in updating the internal memory of Large Language Models (LLMs) by integrating external knowledge. However, KAG inevitably faces knowledge conflicts when the internal memory contradicts external information. Current approaches to mitigating these conflicts mainly focus on improving external knowledge utilization. However, these methods have shown only limited effectiveness in mitigating the knowledge conflict problem, as internal knowledge continues to influence the generation process of LLMs. In this paper, we propose a ParametrIc Pruning-based Knowledge-Augmented Generation (PIP-KAG) approach, which prunes internal knowledge of LLMs and incorporates a plug-and-play adaptation module to help LLMs better leverage external sources. Additionally, we construct the CoConflictQA benchmark based on the hallucination of LLMs to better evaluate contextual faithfulness during answering questions. Experimental results on CoConflictQA demonstrate that PIP-KAG significantly reduces knowledge conflicts and improves context fidelity. Notably, PIP-KAG reduces LLM's parameters by 13%, enhancing parameter efficiency in LLMs within the KAG framework. All codes are available at https://github.com/OpenBMB/PIP-KAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15543v1-abstract-full').style.display = 'none'; document.getElementById('2502.15543v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 7 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15475">arXiv:2502.15475</a> <span> [<a href="https://arxiv.org/pdf/2502.15475">pdf</a>, <a href="https://arxiv.org/format/2502.15475">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Decoding for Punctured Convolutional and Turbo Codes: A Deep Learning Solution for Protocols Compliance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yongli Yan</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+L">Linglong Dai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15475v1-abstract-short" style="display: inline;"> Neural network-based decoding methods have shown promise in enhancing error correction performance, but traditional approaches struggle with the challenges posed by punctured codes. In particular, these methods fail to address the complexities of variable code rates and the need for protocol compatibility. This paper presents a unified Long Short-Term Memory (LSTM)-based decoding architecture spec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15475v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15475v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15475v1-abstract-full" style="display: none;"> Neural network-based decoding methods have shown promise in enhancing error correction performance, but traditional approaches struggle with the challenges posed by punctured codes. In particular, these methods fail to address the complexities of variable code rates and the need for protocol compatibility. This paper presents a unified Long Short-Term Memory (LSTM)-based decoding architecture specifically designed to overcome these challenges. The proposed method unifies punctured convolutional and Turbo codes. A puncture embedding mechanism integrates puncturing patterns directly into the network, enabling seamless adaptation to varying code rates, while balanced bit error rate training ensures robustness across different code lengths, rates, and channels, maintaining protocol flexibility. Extensive simulations in Additive White Gaussian Noise and Rayleigh fading channels demonstrate that the proposed approach outperforms conventional decoding techniques, providing significant improvements in decoding accuracy and robustness. These results underscore the potential of LSTM-based decoding as a promising solution for next-generation artificial intelligence powered communication systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15475v1-abstract-full').style.display = 'none'; document.getElementById('2502.15475v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13447">arXiv:2502.13447</a> <span> [<a href="https://arxiv.org/pdf/2502.13447">pdf</a>, <a href="https://arxiv.org/ps/2502.13447">ps</a>, <a href="https://arxiv.org/format/2502.13447">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Chest X-ray Classification through Knowledge Injection in Cross-Modality Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yang Yan</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+B">Bingqing Yue</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qiaxuan Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+M">Man Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jingyu Chen</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+Z">Zhenzhong Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13447v1-abstract-short" style="display: inline;"> The integration of artificial intelligence in medical imaging has shown tremendous potential, yet the relationship between pre-trained knowledge and performance in cross-modality learning remains unclear. This study investigates how explicitly injecting medical knowledge into the learning process affects the performance of cross-modality classification, focusing on Chest X-ray (CXR) images. We int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13447v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13447v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13447v1-abstract-full" style="display: none;"> The integration of artificial intelligence in medical imaging has shown tremendous potential, yet the relationship between pre-trained knowledge and performance in cross-modality learning remains unclear. This study investigates how explicitly injecting medical knowledge into the learning process affects the performance of cross-modality classification, focusing on Chest X-ray (CXR) images. We introduce a novel Set Theory-based knowledge injection framework that generates captions for CXR images with controllable knowledge granularity. Using this framework, we fine-tune CLIP model on captions with varying levels of medical information. We evaluate the model's performance through zero-shot classification on the CheXpert dataset, a benchmark for CXR classification. Our results demonstrate that injecting fine-grained medical knowledge substantially improves classification accuracy, achieving 72.5\% compared to 49.9\% when using human-generated captions. This highlights the crucial role of domain-specific knowledge in medical cross-modality learning. Furthermore, we explore the influence of knowledge density and the use of domain-specific Large Language Models (LLMs) for caption generation, finding that denser knowledge and specialized LLMs contribute to enhanced performance. This research advances medical image analysis by demonstrating the effectiveness of knowledge injection for improving automated CXR classification, paving the way for more accurate and reliable diagnostic tools. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13447v1-abstract-full').style.display = 'none'; document.getElementById('2502.13447v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP'25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12974">arXiv:2502.12974</a> <span> [<a href="https://arxiv.org/pdf/2502.12974">pdf</a>, <a href="https://arxiv.org/format/2502.12974">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Learning More Effective Representations for Dense Retrieval through Deliberate Thinking Before Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+Y">Yifan Ji</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhipeng Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shi Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yishan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yu Gu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12974v1-abstract-short" style="display: inline;"> Recent dense retrievers usually thrive on the emergency capabilities of Large Language Models (LLMs), using them to encode queries and documents into an embedding space for retrieval. These LLM-based dense retrievers have shown promising performance across various retrieval scenarios. However, relying on a single embedding to represent documents proves less effective in capturing different perspec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12974v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12974v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12974v1-abstract-full" style="display: none;"> Recent dense retrievers usually thrive on the emergency capabilities of Large Language Models (LLMs), using them to encode queries and documents into an embedding space for retrieval. These LLM-based dense retrievers have shown promising performance across various retrieval scenarios. However, relying on a single embedding to represent documents proves less effective in capturing different perspectives of documents for matching. In this paper, we propose Deliberate Thinking based Dense Retriever (DEBATER), which enhances these LLM-based retrievers by enabling them to learn more effective document representations through a step-by-step thinking process. DEBATER introduces the Chain-of-Deliberation mechanism to iteratively optimize document representations using a continuous chain of thought. To consolidate information from various thinking steps, DEBATER also incorporates the Self Distillation mechanism, which identifies the most informative thinking steps and integrates them into a unified text embedding. Experimental results show that DEBATER significantly outperforms existing methods across several retrieval benchmarks, demonstrating superior accuracy and robustness. All codes are available at https://github.com/OpenBMB/DEBATER. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12974v1-abstract-full').style.display = 'none'; document.getElementById('2502.12974v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12520">arXiv:2502.12520</a> <span> [<a href="https://arxiv.org/pdf/2502.12520">pdf</a>, <a href="https://arxiv.org/format/2502.12520">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SAFEERASER: Enhancing Safety in Multimodal Large Language Models through Multimodal Machine Unlearning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Junkai Chen</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zhijie Deng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+K">Kening Zheng</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuliang Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+P">PeiJun Wu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+P">Peijie Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jia Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuming Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12520v1-abstract-short" style="display: inline;"> As Multimodal Large Language Models (MLLMs) develop, their potential security issues have become increasingly prominent. Machine Unlearning (MU), as an effective strategy for forgetting specific knowledge in training data, has been widely used in privacy protection. However, MU for safety in MLLM has yet to be fully explored. To address this issue, we propose SAFEERASER, a safety unlearning benchm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12520v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12520v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12520v1-abstract-full" style="display: none;"> As Multimodal Large Language Models (MLLMs) develop, their potential security issues have become increasingly prominent. Machine Unlearning (MU), as an effective strategy for forgetting specific knowledge in training data, has been widely used in privacy protection. However, MU for safety in MLLM has yet to be fully explored. To address this issue, we propose SAFEERASER, a safety unlearning benchmark for MLLMs, consisting of 3,000 images and 28.8K VQA pairs. We comprehensively evaluate unlearning methods from two perspectives: forget quality and model utility. Our findings show that existing MU methods struggle to maintain model performance while implementing the forget operation and often suffer from over-forgetting. Hence, we introduce Prompt Decouple (PD) Loss to alleviate over-forgetting through decouple prompt during unlearning process. To quantitatively measure over-forgetting mitigated by PD Loss, we propose a new metric called Safe Answer Refusal Rate (SARR). Experimental results demonstrate that combining PD Loss with existing unlearning methods can effectively prevent over-forgetting and achieve a decrease of 79.5% in the SARR metric of LLaVA-7B and LLaVA-13B, while maintaining forget quality and model utility. Our code and dataset will be released upon acceptance. Warning: This paper contains examples of harmful language and images, and reader discretion is recommended. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12520v1-abstract-full').style.display = 'none'; document.getElementById('2502.12520v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12490">arXiv:2502.12490</a> <span> [<a href="https://arxiv.org/pdf/2502.12490">pdf</a>, <a href="https://arxiv.org/format/2502.12490">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> UniGenCoder: Merging Seq2Seq and Seq2Tree Paradigms for Unified Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+L">Liangying Shao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yanfu Yan</a>, <a href="/search/cs?searchtype=author&query=Poshyvanyk%2C+D">Denys Poshyvanyk</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Jinsong Su</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12490v2-abstract-short" style="display: inline;"> Deep learning-based code generation has completely transformed the way developers write programs today. Existing approaches to code generation have focused either on the Sequence-to-Sequence paradigm, which generates target code as a sequence of tokens, or the Sequence-to-Tree paradigm, which outputs code as a sequence of actions. While these two paradigms are intuitively complementary, their comb… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12490v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12490v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12490v2-abstract-full" style="display: none;"> Deep learning-based code generation has completely transformed the way developers write programs today. Existing approaches to code generation have focused either on the Sequence-to-Sequence paradigm, which generates target code as a sequence of tokens, or the Sequence-to-Tree paradigm, which outputs code as a sequence of actions. While these two paradigms are intuitively complementary, their combination has not been previously explored. By comparing the code generated under these two paradigms, we find that integrating them holds significant potential. In this paper, we propose UniGenCoder for code-related generation tasks, which consists of a shared encoder, a shared decoder with a minimal set of additional parameters to unify two paradigms, and a selector that dynamically chooses optimal paradigm for each instance. Also, during the model training, we first perform the multi-task learning and distillation strategies to facilitate knowledge transfer between two paradigms, and then leverage contrastive learning to train the selector. Experimental results on the text-to-code and code-to-code generation tasks demonstrate the effectiveness of our proposed model. We release our code at https://github.com/DeepLearnXMU/UniGenCoder. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12490v2-abstract-full').style.display = 'none'; document.getElementById('2502.12490v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted to 47th International Conference on Software Engineering (ICSE 2025), NIER track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12022">arXiv:2502.12022</a> <span> [<a href="https://arxiv.org/pdf/2502.12022">pdf</a>, <a href="https://arxiv.org/format/2502.12022">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Teaching LLMs According to Their Aptitude: Adaptive Reasoning for Mathematical Problem Solving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xin Xu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yan Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianhao Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuchen Yan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengwu Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zaoyu Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yufei Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Y">Yichun Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasheng Wang</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+L">Lifeng Shang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qun Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12022v1-abstract-short" style="display: inline;"> Existing approaches to mathematical reasoning with large language models (LLMs) rely on Chain-of-Thought (CoT) for generalizability or Tool-Integrated Reasoning (TIR) for precise computation. While efforts have been made to combine these methods, they primarily rely on post-selection or predefined strategies, leaving an open question: whether LLMs can autonomously adapt their reasoning strategy ba… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12022v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12022v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12022v1-abstract-full" style="display: none;"> Existing approaches to mathematical reasoning with large language models (LLMs) rely on Chain-of-Thought (CoT) for generalizability or Tool-Integrated Reasoning (TIR) for precise computation. While efforts have been made to combine these methods, they primarily rely on post-selection or predefined strategies, leaving an open question: whether LLMs can autonomously adapt their reasoning strategy based on their inherent capabilities. In this work, we propose TATA (Teaching LLMs According to Their Aptitude), an adaptive framework that enables LLMs to personalize their reasoning strategy spontaneously, aligning it with their intrinsic aptitude. TATA incorporates base-LLM-aware data selection during supervised fine-tuning (SFT) to tailor training data to the model's unique abilities. This approach equips LLMs to autonomously determine and apply the appropriate reasoning strategy at test time. We evaluate TATA through extensive experiments on six mathematical reasoning benchmarks, using both general-purpose and math-specialized LLMs. Empirical results demonstrate that TATA effectively combines the complementary strengths of CoT and TIR, achieving superior or comparable performance with improved inference efficiency compared to TIR alone. Further analysis underscores the critical role of aptitude-aware data selection in enabling LLMs to make effective and adaptive reasoning decisions and align reasoning strategies with model capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12022v1-abstract-full').style.display = 'none'; document.getElementById('2502.12022v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11946">arXiv:2502.11946</a> <span> [<a href="https://arxiv.org/pdf/2502.11946">pdf</a>, <a href="https://arxiv.org/format/2502.11946">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Step-Audio: Unified Understanding and Generation in Intelligent Speech Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+A">Ailin Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Boyong Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bruce Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+C">Chao Yan</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chen Hu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+C">Chengli Feng</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+F">Fei Tian</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+F">Feiyu Shen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jingbei Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mingrui Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Peng Liu</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+R">Ruihang Miao</a>, <a href="/search/cs?searchtype=author&query=You%2C+W">Wang You</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xuerui Yang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yechang Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Z">Zheng Gong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zixin Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hongyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jianjian Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Brian Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+C">Chengting Feng</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+C">Changyi Wan</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Hanpeng Hu</a> , et al. (120 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11946v2-abstract-short" style="display: inline;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11946v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11946v2-abstract-full" style="display: none;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contributions include: 1) a 130B-parameter unified speech-text multi-modal model that achieves unified understanding and generation, with the Step-Audio-Chat version open-sourced; 2) a generative speech data engine that establishes an affordable voice cloning framework and produces the open-sourced lightweight Step-Audio-TTS-3B model through distillation; 3) an instruction-driven fine control system enabling dynamic adjustments across dialects, emotions, singing, and RAP; 4) an enhanced cognitive architecture augmented with tool calling and role-playing abilities to manage complex tasks effectively. Based on our new StepEval-Audio-360 evaluation benchmark, Step-Audio achieves state-of-the-art performance in human evaluations, especially in terms of instruction following. On open-source benchmarks like LLaMA Question, shows 9.3% average performance improvement, demonstrating our commitment to advancing the development of open-source multi-modal language technologies. Our code and models are available at https://github.com/stepfun-ai/Step-Audio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'none'; document.getElementById('2502.11946v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11916">arXiv:2502.11916</a> <span> [<a href="https://arxiv.org/pdf/2502.11916">pdf</a>, <a href="https://arxiv.org/format/2502.11916">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EssayJudge: A Multi-Granular Benchmark for Assessing Automated Essay Scoring Capabilities of Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+J">Jiamin Su</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+F">Fangteng Fu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Han Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jingheng Ye</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiang Liu</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+J">Jiahao Huo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Huiyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuming Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11916v1-abstract-short" style="display: inline;"> Automated Essay Scoring (AES) plays a crucial role in educational assessment by providing scalable and consistent evaluations of writing tasks. However, traditional AES systems face three major challenges: (1) reliance on handcrafted features that limit generalizability, (2) difficulty in capturing fine-grained traits like coherence and argumentation, and (3) inability to handle multimodal context… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11916v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11916v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11916v1-abstract-full" style="display: none;"> Automated Essay Scoring (AES) plays a crucial role in educational assessment by providing scalable and consistent evaluations of writing tasks. However, traditional AES systems face three major challenges: (1) reliance on handcrafted features that limit generalizability, (2) difficulty in capturing fine-grained traits like coherence and argumentation, and (3) inability to handle multimodal contexts. In the era of Multimodal Large Language Models (MLLMs), we propose EssayJudge, the first multimodal benchmark to evaluate AES capabilities across lexical-, sentence-, and discourse-level traits. By leveraging MLLMs' strengths in trait-specific scoring and multimodal context understanding, EssayJudge aims to offer precise, context-rich evaluations without manual feature engineering, addressing longstanding AES limitations. Our experiments with 18 representative MLLMs reveal gaps in AES performance compared to human evaluation, particularly in discourse-level traits, highlighting the need for further advancements in MLLM-based AES research. Our dataset and code will be available upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11916v1-abstract-full').style.display = 'none'; document.getElementById('2502.11916v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">JS and YY are co-first authors. XH is the corresponding author</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11684">arXiv:2502.11684</a> <span> [<a href="https://arxiv.org/pdf/2502.11684">pdf</a>, <a href="https://arxiv.org/format/2502.11684">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MathFimer: Enhancing Mathematical Reasoning by Expanding Reasoning Steps through Fill-in-the-Middle Task </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuchen Yan</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yongliang Shen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jin Jiang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xin Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mengdi Zhang</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+J">Jian Shao</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+Y">Yueting Zhuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11684v1-abstract-short" style="display: inline;"> Mathematical reasoning represents a critical frontier in advancing large language models (LLMs). While step-by-step approaches have emerged as the dominant paradigm for mathematical problem-solving in LLMs, the quality of reasoning steps in training data fundamentally constrains the performance of the models. Recent studies has demonstrated that more detailed intermediate steps can enhance model p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11684v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11684v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11684v1-abstract-full" style="display: none;"> Mathematical reasoning represents a critical frontier in advancing large language models (LLMs). While step-by-step approaches have emerged as the dominant paradigm for mathematical problem-solving in LLMs, the quality of reasoning steps in training data fundamentally constrains the performance of the models. Recent studies has demonstrated that more detailed intermediate steps can enhance model performance, yet existing methods for step expansion either require more powerful external models or incur substantial computational costs. In this paper, we introduce MathFimer, a novel framework for mathematical reasoning step expansion inspired by the "Fill-in-the-middle" task from code completion. By decomposing solution chains into prefix-suffix pairs and training models to reconstruct missing intermediate steps, we develop a specialized model, MathFimer-7B, on our carefully curated NuminaMath-FIM dataset. We then apply these models to enhance existing mathematical reasoning datasets by inserting detailed intermediate steps into their solution chains, creating MathFimer-expanded versions. Through comprehensive experiments on multiple mathematical reasoning datasets, including MathInstruct, MetaMathQA and etc., we demonstrate that models trained on MathFimer-expanded data consistently outperform their counterparts trained on original data across various benchmarks such as GSM8K and MATH. Our approach offers a practical, scalable solution for enhancing mathematical reasoning capabilities in LLMs without relying on powerful external models or expensive inference procedures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11684v1-abstract-full').style.display = 'none'; document.getElementById('2502.11684v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11456">arXiv:2502.11456</a> <span> [<a href="https://arxiv.org/pdf/2502.11456">pdf</a>, <a href="https://arxiv.org/format/2502.11456">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.media.2025.103461">10.1016/j.media.2025.103461 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Leveraging Labelled Data Knowledge: A Cooperative Rectification Learning Network for Semi-supervised 3D Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanyan Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+K">Kechen Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shuai Ma</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yunhui Yan</a>, <a href="/search/cs?searchtype=author&query=Carneiro%2C+G">Gustavo Carneiro</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11456v1-abstract-short" style="display: inline;"> Semi-supervised 3D medical image segmentation aims to achieve accurate segmentation using few labelled data and numerous unlabelled data. The main challenge in the design of semi-supervised learning methods consists in the effective use of the unlabelled data for training. A promising solution consists of ensuring consistent predictions across different views of the data, where the efficacy of thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11456v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11456v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11456v1-abstract-full" style="display: none;"> Semi-supervised 3D medical image segmentation aims to achieve accurate segmentation using few labelled data and numerous unlabelled data. The main challenge in the design of semi-supervised learning methods consists in the effective use of the unlabelled data for training. A promising solution consists of ensuring consistent predictions across different views of the data, where the efficacy of this strategy depends on the accuracy of the pseudo-labels generated by the model for this consistency learning strategy. In this paper, we introduce a new methodology to produce high-quality pseudo-labels for a consistency learning strategy to address semi-supervised 3D medical image segmentation. The methodology has three important contributions. The first contribution is the Cooperative Rectification Learning Network (CRLN) that learns multiple prototypes per class to be used as external knowledge priors to adaptively rectify pseudo-labels at the voxel level. The second contribution consists of the Dynamic Interaction Module (DIM) to facilitate pairwise and cross-class interactions between prototypes and multi-resolution image features, enabling the production of accurate voxel-level clues for pseudo-label rectification. The third contribution is the Cooperative Positive Supervision (CPS), which optimises uncertain representations to align with unassertive representations of their class distributions, improving the model's accuracy in classifying uncertain regions. Extensive experiments on three public 3D medical segmentation datasets demonstrate the effectiveness and superiority of our semi-supervised learning method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11456v1-abstract-full').style.display = 'none'; document.getElementById('2502.11456v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Medical Image Analysis</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11390">arXiv:2502.11390</a> <span> [<a href="https://arxiv.org/pdf/2502.11390">pdf</a>, <a href="https://arxiv.org/format/2502.11390">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MARS: Mesh AutoRegressive Model for 3D Shape Detailization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jingnan Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Weizhe Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Weixuan Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Senbo Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xibin Song</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+T">Taizhang Shang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shenzhou Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongdong Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaokang Yang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yichao Yan</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+P">Pan Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11390v1-abstract-short" style="display: inline;"> State-of-the-art methods for mesh detailization predominantly utilize Generative Adversarial Networks (GANs) to generate detailed meshes from coarse ones. These methods typically learn a specific style code for each category or similar categories without enforcing geometry supervision across different Levels of Detail (LODs). Consequently, such methods often fail to generalize across a broader ran… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11390v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11390v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11390v1-abstract-full" style="display: none;"> State-of-the-art methods for mesh detailization predominantly utilize Generative Adversarial Networks (GANs) to generate detailed meshes from coarse ones. These methods typically learn a specific style code for each category or similar categories without enforcing geometry supervision across different Levels of Detail (LODs). Consequently, such methods often fail to generalize across a broader range of categories and cannot ensure shape consistency throughout the detailization process. In this paper, we introduce MARS, a novel approach for 3D shape detailization. Our method capitalizes on a novel multi-LOD, multi-category mesh representation to learn shape-consistent mesh representations in latent space across different LODs. We further propose a mesh autoregressive model capable of generating such latent representations through next-LOD token prediction. This approach significantly enhances the realism of the generated shapes. Extensive experiments conducted on the challenging 3D Shape Detailization benchmark demonstrate that our proposed MARS model achieves state-of-the-art performance, surpassing existing methods in both qualitative and quantitative assessments. Notably, the model's capability to generate fine-grained details while preserving the overall shape integrity is particularly commendable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11390v1-abstract-full').style.display = 'none'; document.getElementById('2502.11390v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11051">arXiv:2502.11051</a> <span> [<a href="https://arxiv.org/pdf/2502.11051">pdf</a>, <a href="https://arxiv.org/format/2502.11051">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MMUnlearner: Reformulating Multimodal Machine Unlearning in the Era of Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huo%2C+J">Jiahao Huo</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xu Zheng</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+Y">Yuanhuiyi Lyu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+X">Xin Zou</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zhihua Wei</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuming Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11051v2-abstract-short" style="display: inline;"> Recent progress in Machine Unlearning (MU) has introduced solutions for the selective removal of private or sensitive information encoded within deep neural networks. Nonetheless, MU for Multimodal Large Language Models (MLLMs) remains in its nascent phase. Therefore, we propose to reformulate the task of multimodal MU in the era of MLLMs, which aims to erase only the visual patterns associated wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11051v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11051v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11051v2-abstract-full" style="display: none;"> Recent progress in Machine Unlearning (MU) has introduced solutions for the selective removal of private or sensitive information encoded within deep neural networks. Nonetheless, MU for Multimodal Large Language Models (MLLMs) remains in its nascent phase. Therefore, we propose to reformulate the task of multimodal MU in the era of MLLMs, which aims to erase only the visual patterns associated with a given entity while preserving the corresponding textual knowledge encoded within the original parameters of the language model backbone. Furthermore, we develop a novel geometry-constrained gradient descent method MMUnlearner. It updates the weights of MLLMs with a weight saliency map jointly restricted by the remaining concepts and textual knowledge during unlearning, thereby preserving parameters essential for non-target knowledge. Extensive experiments demonstrate that MMUnlearner surpasses baselines that finetuning MLLMs with VQA data directly through Gradient Ascent (GA) or Negative Preference Optimization (NPO), across all evaluation dimensions. Our code will be released upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11051v2-abstract-full').style.display = 'none'; document.getElementById('2502.11051v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10891">arXiv:2502.10891</a> <span> [<a href="https://arxiv.org/pdf/2502.10891">pdf</a>, <a href="https://arxiv.org/format/2502.10891">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> AquaScope: Reliable Underwater Image Transmission on Mobile Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tian%2C+B">Beitong Tian</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Lingzhi Zhao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Bo Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Mingyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Haozhen Zheng</a>, <a href="/search/cs?searchtype=author&query=Vasisht%2C+D">Deepak Vasisht</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+F+Y">Francis Y. Yan</a>, <a href="/search/cs?searchtype=author&query=Nahrstedt%2C+K">Klara Nahrstedt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10891v1-abstract-short" style="display: inline;"> Underwater communication is essential for both recreational and scientific activities, such as scuba diving. However, existing methods remain highly constrained by environmental challenges and often require specialized hardware, driving research into more accessible underwater communication solutions. While recent acoustic-based communication systems support text messaging on mobile devices, their… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10891v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10891v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10891v1-abstract-full" style="display: none;"> Underwater communication is essential for both recreational and scientific activities, such as scuba diving. However, existing methods remain highly constrained by environmental challenges and often require specialized hardware, driving research into more accessible underwater communication solutions. While recent acoustic-based communication systems support text messaging on mobile devices, their low data rates severely limit broader applications. We present AquaScope, the first acoustic communication system capable of underwater image transmission on commodity mobile devices. To address the key challenges of underwater environments -- limited bandwidth and high transmission errors -- AquaScope employs and enhances generative image compression to improve compression efficiency, and integrates it with reliability-enhancement techniques at the physical layer to strengthen error resilience. We implemented AquaScope on the Android platform and demonstrated its feasibility for underwater image transmission. Experimental results show that AquaScope enables reliable, low-latency image transmission while preserving perceptual image quality, across various bandwidth-constrained and error-prone underwater conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10891v1-abstract-full').style.display = 'none'; document.getElementById('2502.10891v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 26 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10405">arXiv:2502.10405</a> <span> [<a href="https://arxiv.org/pdf/2502.10405">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Crop Yield Time-Series Data Prediction Based on Multiple Hybrid Machine Learning Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yueru Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jialin Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jingwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+X">Xingye Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10405v1-abstract-short" style="display: inline;"> Agriculture plays a crucial role in the global economy and social stability, and accurate crop yield prediction is essential for rational planting planning and decision-making. This study focuses on crop yield Time-Series Data prediction. Considering the crucial significance of agriculture in the global economy and social stability and the importance of accurate crop yield prediction for rational… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10405v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10405v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10405v1-abstract-full" style="display: none;"> Agriculture plays a crucial role in the global economy and social stability, and accurate crop yield prediction is essential for rational planting planning and decision-making. This study focuses on crop yield Time-Series Data prediction. Considering the crucial significance of agriculture in the global economy and social stability and the importance of accurate crop yield prediction for rational planting planning and decision-making, this research uses a dataset containing multiple crops, multiple regions, and data over many years to deeply explore the relationships between climatic factors (average rainfall, average temperature) and agricultural inputs (pesticide usage) and crop yield. Multiple hybrid machine learning models such as Linear Regression, Random Forest, Gradient Boost, XGBoost, KNN, Decision Tree, and Bagging Regressor are adopted for yield prediction. After evaluation, it is found that the Random Forest and Bagging Regressor models perform excellently in predicting crop yield with high accuracy and low error.As agricultural data becomes increasingly rich and time-series prediction techniques continue to evolve, the results of this study contribute to advancing the practical application of crop yield prediction in agricultural production management. The integration of time-series analysis allows for more dynamic, data-driven decision-making, enhancing the accuracy and reliability of crop yield forecasts over time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10405v1-abstract-full').style.display = 'none'; document.getElementById('2502.10405v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10124">arXiv:2502.10124</a> <span> [<a href="https://arxiv.org/pdf/2502.10124">pdf</a>, <a href="https://arxiv.org/format/2502.10124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Modeling the Impact of Visual Stimuli on Redirection Noticeability with Gaze Behavior in Virtual Reality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhipeng Li</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+Y">Yishu Ji</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ruijia Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianqi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuntao Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yuanchun Shi</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukang Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10124v1-abstract-short" style="display: inline;"> While users could embody virtual avatars that mirror their physical movements in Virtual Reality, these avatars' motions can be redirected to enable novel interactions. Excessive redirection, however, could break the user's sense of embodiment due to perceptual conflicts between vision and proprioception. While prior work focused on avatar-related factors influencing the noticeability of redirecti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10124v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10124v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10124v1-abstract-full" style="display: none;"> While users could embody virtual avatars that mirror their physical movements in Virtual Reality, these avatars' motions can be redirected to enable novel interactions. Excessive redirection, however, could break the user's sense of embodiment due to perceptual conflicts between vision and proprioception. While prior work focused on avatar-related factors influencing the noticeability of redirection, we investigate how the visual stimuli in the surrounding virtual environment affect user behavior and, in turn, the noticeability of redirection. Given the wide variety of different types of visual stimuli and their tendency to elicit varying individual reactions, we propose to use users' gaze behavior as an indicator of their response to the stimuli and model the noticeability of redirection. We conducted two user studies to collect users' gaze behavior and noticeability, investigating the relationship between them and identifying the most effective gaze behavior features for predicting noticeability. Based on the data, we developed a regression model that takes users' gaze behavior as input and outputs the noticeability of redirection. We then conducted an evaluation study to test our model on unseen visual stimuli, achieving an accuracy of 0.012 MSE. We further implemented an adaptive redirection technique and conducted a proof-of-concept study to evaluate its effectiveness with complex visual stimuli in two applications. The results indicated that participants experienced less physical demanding and a stronger sense of body ownership when using our adaptive technique, demonstrating the potential of our model to support real-world use cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10124v1-abstract-full').style.display = 'none'; document.getElementById('2502.10124v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, CHI'25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09436">arXiv:2502.09436</a> <span> [<a href="https://arxiv.org/pdf/2502.09436">pdf</a>, <a href="https://arxiv.org/format/2502.09436">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Variable Stiffness for Robust Locomotion through Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Spoljaric%2C+D">Dario Spoljaric</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yashuai Yan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+D">Dongheui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09436v1-abstract-short" style="display: inline;"> Reinforcement-learned locomotion enables legged robots to perform highly dynamic motions but often accompanies time-consuming manual tuning of joint stiffness. This paper introduces a novel control paradigm that integrates variable stiffness into the action space alongside joint positions, enabling grouped stiffness control such as per-joint stiffness (PJS), per-leg stiffness (PLS) and hybrid join… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09436v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09436v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09436v1-abstract-full" style="display: none;"> Reinforcement-learned locomotion enables legged robots to perform highly dynamic motions but often accompanies time-consuming manual tuning of joint stiffness. This paper introduces a novel control paradigm that integrates variable stiffness into the action space alongside joint positions, enabling grouped stiffness control such as per-joint stiffness (PJS), per-leg stiffness (PLS) and hybrid joint-leg stiffness (HJLS). We show that variable stiffness policies, with grouping in per-leg stiffness (PLS), outperform position-based control in velocity tracking and push recovery. In contrast, HJLS excels in energy efficiency. Furthermore, our method showcases robust walking behaviour on diverse outdoor terrains by sim-to-real transfer, although the policy is sorely trained on a flat floor. Our approach simplifies design by eliminating per-joint stiffness tuning while keeping competitive results with various metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09436v1-abstract-full').style.display = 'none'; document.getElementById('2502.09436v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to IFAC Joint Symposia on Mechatronics & Robotics</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09238">arXiv:2502.09238</a> <span> [<a href="https://arxiv.org/pdf/2502.09238">pdf</a>, <a href="https://arxiv.org/format/2502.09238">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> OpenBench: A New Benchmark and Baseline for Semantic Navigation in Smart Logistics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junhui Wang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+D">Dongjie Huo</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zehui Xu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yongliang Shi</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yimin Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuanxin Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chao Gao</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+Y">Yan Qiao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+G">Guyue Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09238v1-abstract-short" style="display: inline;"> The increasing demand for efficient last-mile delivery in smart logistics underscores the role of autonomous robots in enhancing operational efficiency and reducing costs. Traditional navigation methods, which depend on high-precision maps, are resource-intensive, while learning-based approaches often struggle with generalization in real-world scenarios. To address these challenges, this work prop… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09238v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09238v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09238v1-abstract-full" style="display: none;"> The increasing demand for efficient last-mile delivery in smart logistics underscores the role of autonomous robots in enhancing operational efficiency and reducing costs. Traditional navigation methods, which depend on high-precision maps, are resource-intensive, while learning-based approaches often struggle with generalization in real-world scenarios. To address these challenges, this work proposes the Openstreetmap-enhanced oPen-air sEmantic Navigation (OPEN) system that combines foundation models with classic algorithms for scalable outdoor navigation. The system uses off-the-shelf OpenStreetMap (OSM) for flexible map representation, thereby eliminating the need for extensive pre-mapping efforts. It also employs Large Language Models (LLMs) to comprehend delivery instructions and Vision-Language Models (VLMs) for global localization, map updates, and house number recognition. To compensate the limitations of existing benchmarks that are inadequate for assessing last-mile delivery, this work introduces a new benchmark specifically designed for outdoor navigation in residential areas, reflecting the real-world challenges faced by autonomous delivery systems. Extensive experiments in simulated and real-world environments demonstrate the proposed system's efficacy in enhancing navigation efficiency and reliability. To facilitate further research, our code and benchmark are publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09238v1-abstract-full').style.display = 'none'; document.getElementById('2502.09238v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09122">arXiv:2502.09122</a> <span> [<a href="https://arxiv.org/pdf/2502.09122">pdf</a>, <a href="https://arxiv.org/format/2502.09122">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Improving Deep Regression with Tightness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shihao Zhang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuguang Yan</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+A">Angela Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09122v1-abstract-short" style="display: inline;"> For deep regression, preserving the ordinality of the targets with respect to the feature representation improves performance across various tasks. However, a theoretical explanation for the benefits of ordinality is still lacking. This work reveals that preserving ordinality reduces the conditional entropy $H(Z|Y)$ of representation $Z$ conditional on the target $Y$. However, our findings reveal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09122v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09122v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09122v1-abstract-full" style="display: none;"> For deep regression, preserving the ordinality of the targets with respect to the feature representation improves performance across various tasks. However, a theoretical explanation for the benefits of ordinality is still lacking. This work reveals that preserving ordinality reduces the conditional entropy $H(Z|Y)$ of representation $Z$ conditional on the target $Y$. However, our findings reveal that typical regression losses do little to reduce $H(Z|Y)$, even though it is vital for generalization performance. With this motivation, we introduce an optimal transport-based regularizer to preserve the similarity relationships of targets in the feature space to reduce $H(Z|Y)$. Additionally, we introduce a simple yet efficient strategy of duplicating the regressor targets, also with the aim of reducing $H(Z|Y)$. Experiments on three real-world regression tasks verify the effectiveness of our strategies to improve deep regression. Code: https://github.com/needylove/Regression_tightness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09122v1-abstract-full').style.display = 'none'; document.getElementById('2502.09122v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025, Code: https://github.com/needylove/Regression_tightness</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08691">arXiv:2502.08691</a> <span> [<a href="https://arxiv.org/pdf/2502.08691">pdf</a>, <a href="https://arxiv.org/format/2502.08691">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AgentSociety: Large-Scale Simulation of LLM-Driven Generative Agents Advances Understanding of Human Behaviors and Society </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Piao%2C+J">Jinghua Piao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuwei Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nian Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Junbo Yan</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+X">Xiaochong Lan</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zhihong Lu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zhiheng Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J+Y">Jing Yi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+D">Di Zhou</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chen Gao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fengli Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fang Zhang</a>, <a href="/search/cs?searchtype=author&query=Rong%2C+K">Ke Rong</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Jun Su</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08691v1-abstract-short" style="display: inline;"> Understanding human behavior and society is a central focus in social sciences, with the rise of generative social science marking a significant paradigmatic shift. By leveraging bottom-up simulations, it replaces costly and logistically challenging traditional experiments with scalable, replicable, and systematic computational approaches for studying complex social dynamics. Recent advances in la… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08691v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08691v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08691v1-abstract-full" style="display: none;"> Understanding human behavior and society is a central focus in social sciences, with the rise of generative social science marking a significant paradigmatic shift. By leveraging bottom-up simulations, it replaces costly and logistically challenging traditional experiments with scalable, replicable, and systematic computational approaches for studying complex social dynamics. Recent advances in large language models (LLMs) have further transformed this research paradigm, enabling the creation of human-like generative social agents and realistic simulacra of society. In this paper, we propose AgentSociety, a large-scale social simulator that integrates LLM-driven agents, a realistic societal environment, and a powerful large-scale simulation engine. Based on the proposed simulator, we generate social lives for over 10k agents, simulating their 5 million interactions both among agents and between agents and their environment. Furthermore, we explore the potential of AgentSociety as a testbed for computational social experiments, focusing on four key social issues: polarization, the spread of inflammatory messages, the effects of universal basic income policies, and the impact of external shocks such as hurricanes. These four issues serve as valuable cases for assessing AgentSociety's support for typical research methods -- such as surveys, interviews, and interventions -- as well as for investigating the patterns, causes, and underlying mechanisms of social issues. The alignment between AgentSociety's outcomes and real-world experimental results not only demonstrates its ability to capture human behaviors and their underlying mechanisms, but also underscores its potential as an important platform for social scientists and policymakers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08691v1-abstract-full').style.display = 'none'; document.getElementById('2502.08691v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05849">arXiv:2502.05849</a> <span> [<a href="https://arxiv.org/pdf/2502.05849">pdf</a>, <a href="https://arxiv.org/format/2502.05849">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Fact-or-Fair: A Checklist for Behavioral Testing of AI Models on Fairness-Related Queries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jen-tse Huang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuhang Yan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Linqi Liu</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+Y">Yixin Wan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+K">Kai-Wei Chang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+M+R">Michael R. Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05849v1-abstract-short" style="display: inline;"> The generation of incorrect images, such as depictions of people of color in Nazi-era uniforms by Gemini, frustrated users and harmed Google's reputation, motivating us to investigate the relationship between accurately reflecting factuality and promoting diversity and equity. In this study, we focus on 19 real-world statistics collected from authoritative sources. Using these statistics, we devel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05849v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05849v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05849v1-abstract-full" style="display: none;"> The generation of incorrect images, such as depictions of people of color in Nazi-era uniforms by Gemini, frustrated users and harmed Google's reputation, motivating us to investigate the relationship between accurately reflecting factuality and promoting diversity and equity. In this study, we focus on 19 real-world statistics collected from authoritative sources. Using these statistics, we develop a checklist comprising objective and subjective queries to analyze behavior of large language models (LLMs) and text-to-image (T2I) models. Objective queries assess the models' ability to provide accurate world knowledge. In contrast, the design of subjective queries follows a key principle: statistical or experiential priors should not be overgeneralized to individuals, ensuring that models uphold diversity. These subjective queries are based on three common human cognitive errors that often result in social biases. We propose metrics to assess factuality and fairness, and formally prove the inherent trade-off between these two aspects. Results show that GPT-4o and DALL-E 3 perform notably well among six LLMs and four T2I models. Our code is publicly available at https://github.com/uclanlp/Fact-or-Fair. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05849v1-abstract-full').style.display = 'none'; document.getElementById('2502.05849v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages of main text; 7 pages of appendices;</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05467">arXiv:2502.05467</a> <span> [<a href="https://arxiv.org/pdf/2502.05467">pdf</a>, <a href="https://arxiv.org/format/2502.05467">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Position: LLMs Can be Good Tutors in Foreign Language Education </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jingheng Ye</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shen Wang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+D">Deqing Zou</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kun Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Hai-Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zenglin Xu</a>, <a href="/search/cs?searchtype=author&query=King%2C+I">Irwin King</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P+S">Philip S. Yu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Q">Qingsong Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05467v1-abstract-short" style="display: inline;"> While recent efforts have begun integrating large language models (LLMs) into foreign language education (FLE), they often rely on traditional approaches to learning tasks without fully embracing educational methodologies, thus lacking adaptability to language learning. To address this gap, we argue that LLMs have the potential to serve as effective tutors in FLE. Specifically, LLMs can play three… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05467v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05467v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05467v1-abstract-full" style="display: none;"> While recent efforts have begun integrating large language models (LLMs) into foreign language education (FLE), they often rely on traditional approaches to learning tasks without fully embracing educational methodologies, thus lacking adaptability to language learning. To address this gap, we argue that LLMs have the potential to serve as effective tutors in FLE. Specifically, LLMs can play three critical roles: (1) as data enhancers, improving the creation of learning materials or serving as student simulations; (2) as task predictors, serving as learner assessment or optimizing learning pathway; and (3) as agents, enabling personalized and inclusive education. We encourage interdisciplinary research to explore these roles, fostering innovation while addressing challenges and risks, ultimately advancing FLE through the thoughtful integration of LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05467v1-abstract-full').style.display = 'none'; document.getElementById('2502.05467v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04345">arXiv:2502.04345</a> <span> [<a href="https://arxiv.org/pdf/2502.04345">pdf</a>, <a href="https://arxiv.org/format/2502.04345">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> JingFang: A Traditional Chinese Medicine Large Language Model of Expert-Level Medical Diagnosis and Syndrome Differentiation-Based Treatment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yehan Yan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+T">Tianhao Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruotai Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xinhan Zheng</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+G">Guodong Shan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chisheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04345v1-abstract-short" style="display: inline;"> Traditional Chinese medicine (TCM) plays a vital role in health protection and disease treatment, but its practical application requires extensive medical knowledge and clinical experience. Existing TCM Large Language Models (LLMs) exhibit critical limitations of uncomprehensive medical consultation and diagnoses, and inaccurate syndrome differentiation-based treatment. To address these issues, th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04345v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04345v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04345v1-abstract-full" style="display: none;"> Traditional Chinese medicine (TCM) plays a vital role in health protection and disease treatment, but its practical application requires extensive medical knowledge and clinical experience. Existing TCM Large Language Models (LLMs) exhibit critical limitations of uncomprehensive medical consultation and diagnoses, and inaccurate syndrome differentiation-based treatment. To address these issues, this study establishes JingFang (JF): a novel TCM Large Language Model that demonstrates the expert-level capability of medical diagnosis and syndrome differentiation-based treatment. We innovate a Multi-agent Dynamic Collaborative Chain-of-Thought Mechanism (MDCCTM) for medical consultation, enabling JF with effective and accurate diagnostic ability. In addition, a Syndrome Agent and a Dual-Stage Retrieval Scheme (DSRS) are developed to significantly enhance the capacity of JF for disease treatment based on syndrome differentiation. JingFang not only facilitates the application of LLMs but also promotes the effective practice of TCM in human health protection and disease treatment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04345v1-abstract-full').style.display = 'none'; document.getElementById('2502.04345v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02871">arXiv:2502.02871</a> <span> [<a href="https://arxiv.org/pdf/2502.02871">pdf</a>, <a href="https://arxiv.org/format/2502.02871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Position: Multimodal Large Language Models Can Significantly Advance Scientific Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shen Wang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+J">Jiahao Huo</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jingheng Ye</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+Z">Zhendong Chu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuming Hu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P+S">Philip S. Yu</a>, <a href="/search/cs?searchtype=author&query=Gomes%2C+C">Carla Gomes</a>, <a href="/search/cs?searchtype=author&query=Selman%2C+B">Bart Selman</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Q">Qingsong Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02871v1-abstract-short" style="display: inline;"> Scientific reasoning, the process through which humans apply logic, evidence, and critical thinking to explore and interpret scientific phenomena, is essential in advancing knowledge reasoning across diverse fields. However, despite significant progress, current scientific reasoning models still struggle with generalization across domains and often fall short of multimodal perception. Multimodal L… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02871v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02871v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02871v1-abstract-full" style="display: none;"> Scientific reasoning, the process through which humans apply logic, evidence, and critical thinking to explore and interpret scientific phenomena, is essential in advancing knowledge reasoning across diverse fields. However, despite significant progress, current scientific reasoning models still struggle with generalization across domains and often fall short of multimodal perception. Multimodal Large Language Models (MLLMs), which integrate text, images, and other modalities, present an exciting opportunity to overcome these limitations and enhance scientific reasoning. Therefore, this position paper argues that MLLMs can significantly advance scientific reasoning across disciplines such as mathematics, physics, chemistry, and biology. First, we propose a four-stage research roadmap of scientific reasoning capabilities, and highlight the current state of MLLM applications in scientific reasoning, noting their ability to integrate and reason over diverse data types. Second, we summarize the key challenges that remain obstacles to achieving MLLM's full potential. To address these challenges, we propose actionable insights and suggestions for the future. Overall, our work offers a novel perspective on MLLM integration with scientific reasoning, providing the LLM community with a valuable vision for achieving Artificial General Intelligence (AGI). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02871v1-abstract-full').style.display = 'none'; document.getElementById('2502.02871v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02741">arXiv:2502.02741</a> <span> [<a href="https://arxiv.org/pdf/2502.02741">pdf</a>, <a href="https://arxiv.org/format/2502.02741">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RFMedSAM 2: Automatic Prompt Refinement for Enhanced Volumetric Medical Image Segmentation with SAM 2 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+B">Bin Xie</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hao Tang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yan Yan</a>, <a href="/search/cs?searchtype=author&query=Agam%2C+G">Gady Agam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02741v1-abstract-short" style="display: inline;"> Segment Anything Model 2 (SAM 2), a prompt-driven foundation model extending SAM to both image and video domains, has shown superior zero-shot performance compared to its predecessor. Building on SAM's success in medical image segmentation, SAM 2 presents significant potential for further advancement. However, similar to SAM, SAM 2 is limited by its output of binary masks, inability to infer seman… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02741v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02741v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02741v1-abstract-full" style="display: none;"> Segment Anything Model 2 (SAM 2), a prompt-driven foundation model extending SAM to both image and video domains, has shown superior zero-shot performance compared to its predecessor. Building on SAM's success in medical image segmentation, SAM 2 presents significant potential for further advancement. However, similar to SAM, SAM 2 is limited by its output of binary masks, inability to infer semantic labels, and dependence on precise prompts for the target object area. Additionally, direct application of SAM and SAM 2 to medical image segmentation tasks yields suboptimal results. In this paper, we explore the upper performance limit of SAM 2 using custom fine-tuning adapters, achieving a Dice Similarity Coefficient (DSC) of 92.30% on the BTCV dataset, surpassing the state-of-the-art nnUNet by 12%. Following this, we address the prompt dependency by investigating various prompt generators. We introduce a UNet to autonomously generate predicted masks and bounding boxes, which serve as input to SAM 2. Subsequent dual-stage refinements by SAM 2 further enhance performance. Extensive experiments show that our method achieves state-of-the-art results on the AMOS2022 dataset, with a Dice improvement of 2.9% compared to nnUNet, and outperforms nnUNet by 6.4% on the BTCV dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02741v1-abstract-full').style.display = 'none'; document.getElementById('2502.02741v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00848">arXiv:2502.00848</a> <span> [<a href="https://arxiv.org/pdf/2502.00848">pdf</a>, <a href="https://arxiv.org/format/2502.00848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RealRAG: Retrieval-augmented Realistic Image Generation via Self-reflective Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+Y">Yuanhuiyi Lyu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xu Zheng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Lutao Jiang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+X">Xin Zou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Huiyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Linfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuming Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00848v1-abstract-short" style="display: inline;"> Recent text-to-image generative models, e.g., Stable Diffusion V3 and Flux, have achieved notable progress. However, these models are strongly restricted to their limited knowledge, a.k.a., their own fixed parameters, that are trained with closed datasets. This leads to significant hallucinations or distortions when facing fine-grained and unseen novel real-world objects, e.g., the appearance of t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00848v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00848v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00848v1-abstract-full" style="display: none;"> Recent text-to-image generative models, e.g., Stable Diffusion V3 and Flux, have achieved notable progress. However, these models are strongly restricted to their limited knowledge, a.k.a., their own fixed parameters, that are trained with closed datasets. This leads to significant hallucinations or distortions when facing fine-grained and unseen novel real-world objects, e.g., the appearance of the Tesla Cybertruck. To this end, we present the first real-object-based retrieval-augmented generation framework (RealRAG), which augments fine-grained and unseen novel object generation by learning and retrieving real-world images to overcome the knowledge gaps of generative models. Specifically, to integrate missing memory for unseen novel object generation, we train a reflective retriever by self-reflective contrastive learning, which injects the generator's knowledge into the sef-reflective negatives, ensuring that the retrieved augmented images compensate for the model's missing knowledge. Furthermore, the real-object-based framework integrates fine-grained visual knowledge for the generative models, tackling the distortion problem and improving the realism for fine-grained object generation. Our Real-RAG is superior in its modular application to all types of state-of-the-art text-to-image generative models and also delivers remarkable performance boosts with all of them, such as a gain of 16.18% FID score with the auto-regressive model on the Stanford Car benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00848v1-abstract-full').style.display = 'none'; document.getElementById('2502.00848v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00630">arXiv:2502.00630</a> <span> [<a href="https://arxiv.org/pdf/2502.00630">pdf</a>, <a href="https://arxiv.org/format/2502.00630">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Self-Prompt SAM: Medical Image Segmentation via Automatic Prompt SAM Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+B">Bin Xie</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hao Tang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+D">Dawen Cai</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yan Yan</a>, <a href="/search/cs?searchtype=author&query=Agam%2C+G">Gady Agam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00630v1-abstract-short" style="display: inline;"> Segment Anything Model (SAM) has demonstrated impressive zero-shot performance and brought a range of unexplored capabilities to natural image segmentation tasks. However, as a very important branch of image segmentation, the performance of SAM remains uncertain when applied to medical image segmentation due to the significant differences between natural images and medical images. Meanwhile, it is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00630v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00630v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00630v1-abstract-full" style="display: none;"> Segment Anything Model (SAM) has demonstrated impressive zero-shot performance and brought a range of unexplored capabilities to natural image segmentation tasks. However, as a very important branch of image segmentation, the performance of SAM remains uncertain when applied to medical image segmentation due to the significant differences between natural images and medical images. Meanwhile, it is harsh to meet the SAM's requirements of extra prompts provided, such as points or boxes to specify medical regions. In this paper, we propose a novel self-prompt SAM adaptation framework for medical image segmentation, named Self-Prompt-SAM. We design a multi-scale prompt generator combined with the image encoder in SAM to generate auxiliary masks. Then, we use the auxiliary masks to generate bounding boxes as box prompts and use Distance Transform to select the most central points as point prompts. Meanwhile, we design a 3D depth-fused adapter (DfusedAdapter) and inject the DFusedAdapter into each transformer in the image encoder and mask decoder to enable pre-trained 2D SAM models to extract 3D information and adapt to 3D medical images. Extensive experiments demonstrate that our method achieves state-of-the-art performance and outperforms nnUNet by 2.3% on AMOS2022, 1.6% on ACDCand 0.5% on Synapse datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00630v1-abstract-full').style.display = 'none'; document.getElementById('2502.00630v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00338">arXiv:2502.00338</a> <span> [<a href="https://arxiv.org/pdf/2502.00338">pdf</a>, <a href="https://arxiv.org/format/2502.00338">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> </div> </div> <p class="title is-5 mathjax"> OneForecast: A Universal Framework for Global and Regional Weather Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yuan Gao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Hao Wu</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+R">Ruiqi Shu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Huanshuo Dong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fan Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Rui Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Q">Qingsong Wen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuming Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kun Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiahao Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qing Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Hui Xiong</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaomeng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00338v1-abstract-short" style="display: inline;"> Accurate weather forecasts are important for disaster prevention, agricultural planning, and water resource management. Traditional numerical weather prediction (NWP) methods offer physically interpretable high-accuracy predictions but are computationally expensive and fail to fully leverage rapidly growing historical data. In recent years, deep learning methods have made significant progress in w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00338v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00338v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00338v1-abstract-full" style="display: none;"> Accurate weather forecasts are important for disaster prevention, agricultural planning, and water resource management. Traditional numerical weather prediction (NWP) methods offer physically interpretable high-accuracy predictions but are computationally expensive and fail to fully leverage rapidly growing historical data. In recent years, deep learning methods have made significant progress in weather forecasting, but challenges remain, such as balancing global and regional high-resolution forecasts, excessive smoothing in extreme event predictions, and insufficient dynamic system modeling. To address these issues, this paper proposes a global-regional nested weather forecasting framework based on graph neural networks (GNNs). By combining a dynamic system perspective with multi-grid theory, we construct a multi-scale graph structure and densify the target region to capture local high-frequency features. We introduce an adaptive information propagation mechanism, using dynamic gating units to deeply integrate node and edge features for more accurate extreme event forecasting. For high-resolution regional forecasts, we propose a neural nested grid method to mitigate boundary information loss. Experimental results show that the proposed method performs excellently across global to regional scales and short-term to long-term forecasts, especially in extreme event predictions (e.g., typhoons), significantly improving forecast accuracy. Our codes are available at https://github.com/YuanGao-YG/OneForecast. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00338v1-abstract-full').style.display = 'none'; document.getElementById('2502.00338v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00334">arXiv:2502.00334</a> <span> [<a href="https://arxiv.org/pdf/2502.00334">pdf</a>, <a href="https://arxiv.org/format/2502.00334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> UGPhysics: A Comprehensive Benchmark for Undergraduate Physics Reasoning with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xin Xu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qiyun Xu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+T">Tong Xiao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianhao Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuchen Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaxin Zhang</a>, <a href="/search/cs?searchtype=author&query=Diao%2C+S">Shizhe Diao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Can Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00334v2-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable capabilities in solving complex reasoning tasks, particularly in mathematics. However, the domain of physics reasoning presents unique challenges that have received significantly less attention. Existing benchmarks often fall short in evaluating LLMs' abilities on the breadth and depth of undergraduate-level physics, underscoring the need f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00334v2-abstract-full').style.display = 'inline'; document.getElementById('2502.00334v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00334v2-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable capabilities in solving complex reasoning tasks, particularly in mathematics. However, the domain of physics reasoning presents unique challenges that have received significantly less attention. Existing benchmarks often fall short in evaluating LLMs' abilities on the breadth and depth of undergraduate-level physics, underscoring the need for a comprehensive evaluation. To fill this gap, we introduce UGPhysics, a large-scale and comprehensive benchmark specifically designed to evaluate UnderGraduate-level Physics (UGPhysics) reasoning with LLMs. UGPhysics includes 5,520 undergraduate-level physics problems in both English and Chinese, covering 13 subjects with seven different answer types and four distinct physics reasoning skills, all rigorously screened for data leakage. Additionally, we develop a Model-Assistant Rule-based Judgment (MARJ) pipeline specifically tailored for assessing answer correctness of physics problems, ensuring accurate evaluation. Our evaluation of 31 leading LLMs shows that the highest overall accuracy, 49.8% (achieved by OpenAI-o1-mini), emphasizes the necessity for models with stronger physics reasoning skills, beyond math abilities. We hope UGPhysics, along with MARJ, will drive future advancements in AI for physics reasoning. Codes and data are available at https://github.com/YangLabHKUST/UGPhysics . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00334v2-abstract-full').style.display = 'none'; document.getElementById('2502.00334v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18863">arXiv:2501.18863</a> <span> [<a href="https://arxiv.org/pdf/2501.18863">pdf</a>, <a href="https://arxiv.org/ps/2501.18863">ps</a>, <a href="https://arxiv.org/format/2501.18863">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adaptivity and Convergence of Probability Flow ODEs in Diffusion Generative Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiaqi Tang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuling Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18863v1-abstract-short" style="display: inline;"> Score-based generative models, which transform noise into data by learning to reverse a diffusion process, have become a cornerstone of modern generative AI. This paper contributes to establishing theoretical guarantees for the probability flow ODE, a widely used diffusion-based sampler known for its practical efficiency. While a number of prior works address its general convergence theory, it rem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18863v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18863v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18863v1-abstract-full" style="display: none;"> Score-based generative models, which transform noise into data by learning to reverse a diffusion process, have become a cornerstone of modern generative AI. This paper contributes to establishing theoretical guarantees for the probability flow ODE, a widely used diffusion-based sampler known for its practical efficiency. While a number of prior works address its general convergence theory, it remains unclear whether the probability flow ODE sampler can adapt to the low-dimensional structures commonly present in natural image data. We demonstrate that, with accurate score function estimation, the probability flow ODE sampler achieves a convergence rate of $O(k/T)$ in total variation distance (ignoring logarithmic factors), where $k$ is the intrinsic dimension of the target distribution and $T$ is the number of iterations. This dimension-free convergence rate improves upon existing results that scale with the typically much larger ambient dimension, highlighting the ability of the probability flow ODE sampler to exploit intrinsic low-dimensional structures in the target distribution for faster sampling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18863v1-abstract-full').style.display = 'none'; document.getElementById('2501.18863v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17555">arXiv:2501.17555</a> <span> [<a href="https://arxiv.org/pdf/2501.17555">pdf</a>, <a href="https://arxiv.org/format/2501.17555">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> An Exceptional Dataset For Rare Pancreatic Tumor Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenqi Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yingli Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Keyang Zhou</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaoxiao Hu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zilu Zheng</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yue Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinpeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+W">Wei Tang</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+Z">Zhenxing Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17555v1-abstract-short" style="display: inline;"> Pancreatic NEuroendocrine Tumors (pNETs) are very rare endocrine neoplasms that account for less than 5% of all pancreatic malignancies, with an incidence of only 1-1.5 cases per 100,000. Early detection of pNETs is critical for improving patient survival, but the rarity of pNETs makes segmenting them from CT a very challenging problem. So far, there has not been a dataset specifically for pNETs a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17555v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17555v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17555v1-abstract-full" style="display: none;"> Pancreatic NEuroendocrine Tumors (pNETs) are very rare endocrine neoplasms that account for less than 5% of all pancreatic malignancies, with an incidence of only 1-1.5 cases per 100,000. Early detection of pNETs is critical for improving patient survival, but the rarity of pNETs makes segmenting them from CT a very challenging problem. So far, there has not been a dataset specifically for pNETs available to researchers. To address this issue, we propose a pNETs dataset, a well-annotated Contrast-Enhanced Computed Tomography (CECT) dataset focused exclusively on Pancreatic Neuroendocrine Tumors, containing data from 469 patients. This is the first dataset solely dedicated to pNETs, distinguishing it from previous collections. Additionally, we provide the baseline detection networks with a new slice-wise weight loss function designed for the UNet-based model, improving the overall pNET segmentation performance. We hope that our dataset can enhance the understanding and diagnosis of pNET Tumors within the medical community, facilitate the development of more accurate diagnostic tools, and ultimately improve patient outcomes and advance the field of oncology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17555v1-abstract-full').style.display = 'none'; document.getElementById('2501.17555v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17547">arXiv:2501.17547</a> <span> [<a href="https://arxiv.org/pdf/2501.17547">pdf</a>, <a href="https://arxiv.org/format/2501.17547">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Training-Free Open-World Classification with 3D Generative Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+X">Xinzhe Xia</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Weiguang Zhao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuyao Yan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+G">Guanyu Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaizhu Huang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xi Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17547v1-abstract-short" style="display: inline;"> 3D open-world classification is a challenging yet essential task in dynamic and unstructured real-world scenarios, requiring both open-category and open-pose recognition. To address these challenges, recent wisdom often takes sophisticated 2D pre-trained models to provide enriched and stable representations. However, these methods largely rely on how 3D objects can be projected into 2D space, whic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17547v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17547v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17547v1-abstract-full" style="display: none;"> 3D open-world classification is a challenging yet essential task in dynamic and unstructured real-world scenarios, requiring both open-category and open-pose recognition. To address these challenges, recent wisdom often takes sophisticated 2D pre-trained models to provide enriched and stable representations. However, these methods largely rely on how 3D objects can be projected into 2D space, which is unfortunately not well solved, and thus significantly limits their performance. Unlike these present efforts, in this paper we make a pioneering exploration of 3D generative models for 3D open-world classification. Drawing on abundant prior knowledge from 3D generative models, we additionally craft a rotation-invariant feature extractor. This innovative synergy endows our pipeline with the advantages of being training-free, open-category, and pose-invariant, thus well suited to 3D open-world classification. Extensive experiments on benchmark datasets demonstrate the potential of generative models in 3D open-world classification, achieving state-of-the-art performance on ModelNet10 and McGill with 32.0% and 8.7% overall accuracy improvement, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17547v1-abstract-full').style.display = 'none'; document.getElementById('2501.17547v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15711">arXiv:2501.15711</a> <span> [<a href="https://arxiv.org/pdf/2501.15711">pdf</a>, <a href="https://arxiv.org/format/2501.15711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713496">10.1145/3706598.3713496 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DanmuA11y: Making Time-Synced On-Screen Video Comments (Danmu) Accessible to Blind and Low Vision Users via Multi-Viewer Audio Discussions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+S">Shuchang Xu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xiaofu Jin</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+H">Huamin Qu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukang Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15711v1-abstract-short" style="display: inline;"> By overlaying time-synced user comments on videos, Danmu creates a co-watching experience for online viewers. However, its visual-centric design poses significant challenges for blind and low vision (BLV) viewers. Our formative study identified three primary challenges that hinder BLV viewers' engagement with Danmu: the lack of visual context, the speech interference between comments and videos, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15711v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15711v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15711v1-abstract-full" style="display: none;"> By overlaying time-synced user comments on videos, Danmu creates a co-watching experience for online viewers. However, its visual-centric design poses significant challenges for blind and low vision (BLV) viewers. Our formative study identified three primary challenges that hinder BLV viewers' engagement with Danmu: the lack of visual context, the speech interference between comments and videos, and the disorganization of comments. To address these challenges, we present DanmuA11y, a system that makes Danmu accessible by transforming it into multi-viewer audio discussions. DanmuA11y incorporates three core features: (1) Augmenting Danmu with visual context, (2) Seamlessly integrating Danmu into videos, and (3) Presenting Danmu via multi-viewer discussions. Evaluation with twelve BLV viewers demonstrated that DanmuA11y significantly improved Danmu comprehension, provided smooth viewing experiences, and fostered social connections among viewers. We further highlight implications for enhancing commentary accessibility in video-based social media and live-streaming platforms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15711v1-abstract-full').style.display = 'none'; document.getElementById('2501.15711v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15408">arXiv:2501.15408</a> <span> [<a href="https://arxiv.org/pdf/2501.15408">pdf</a>, <a href="https://arxiv.org/format/2501.15408">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Memory Reviver: Supporting Photo-Collection Reminiscence for People with Visual Impairment via a Proactive Chatbot </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+S">Shuchang Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chang Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zichen Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xiaofu Jin</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+L">Linping Yuan</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukang Yan</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+H">Huamin Qu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15408v1-abstract-short" style="display: inline;"> Reminiscing with photo collections offers significant psychological benefits but poses challenges for people with visual impairment (PVI). Their current reliance on sighted help restricts the flexibility of this activity. In response, we explored using a chatbot in a preliminary study. We identified two primary challenges that hinder effective reminiscence with a chatbot: the scattering of informa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15408v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15408v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15408v1-abstract-full" style="display: none;"> Reminiscing with photo collections offers significant psychological benefits but poses challenges for people with visual impairment (PVI). Their current reliance on sighted help restricts the flexibility of this activity. In response, we explored using a chatbot in a preliminary study. We identified two primary challenges that hinder effective reminiscence with a chatbot: the scattering of information and a lack of proactive guidance. To address these limitations, we present Memory Reviver, a proactive chatbot that helps PVI reminisce with a photo collection through natural language communication. Memory Reviver incorporates two novel features: (1) a Memory Tree, which uses a hierarchical structure to organize the information in a photo collection; and (2) a Proactive Strategy, which actively delivers information to users at proper conversation rounds. Evaluation with twelve PVI demonstrated that Memory Reviver effectively facilitated engaging reminiscence, enhanced understanding of photo collections, and delivered natural conversational experiences. Based on our findings, we distill implications for supporting photo reminiscence and designing chatbots for PVI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15408v1-abstract-full').style.display = 'none'; document.getElementById('2501.15408v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15214">arXiv:2501.15214</a> <span> [<a href="https://arxiv.org/pdf/2501.15214">pdf</a>, <a href="https://arxiv.org/format/2501.15214">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Zero-shot Robotic Manipulation with Language-guided Instruction and Formal Task Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+J">Junfeng Tang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Z">Zihan Ye</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuping Yan</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Ziqi Zheng</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+T">Ting Gao</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yaochu Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15214v1-abstract-short" style="display: inline;"> Robotic manipulation is often challenging due to the long-horizon tasks and the complex object relationships. A common solution is to develop a task and motion planning framework that integrates planning for high-level task and low-level motion. Recently, inspired by the powerful reasoning ability of Large Language Models (LLMs), LLM-based planning approaches have achieved remarkable progress. How… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15214v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15214v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15214v1-abstract-full" style="display: none;"> Robotic manipulation is often challenging due to the long-horizon tasks and the complex object relationships. A common solution is to develop a task and motion planning framework that integrates planning for high-level task and low-level motion. Recently, inspired by the powerful reasoning ability of Large Language Models (LLMs), LLM-based planning approaches have achieved remarkable progress. However, these methods still heavily rely on expert-specific knowledge, often generating invalid plans for unseen and unfamiliar tasks. To address this issue, we propose an innovative language-guided symbolic task planning (LM-SymOpt) framework with optimization. It is the first expert-free planning framework since we combine the world knowledge from LLMs with formal reasoning, resulting in improved generalization capability to new tasks. Specifically, differ to most existing work, our LM-SymOpt employs LLMs to translate natural language instructions into symbolic representations, thereby representing actions as high-level symbols and reducing the search space for planning. Next, after evaluating the action probability of completing the task using LLMs, a weighted random sampling method is introduced to generate candidate plans. Their feasibility is assessed through symbolic reasoning and their cost efficiency is then evaluated using trajectory optimization for selecting the optimal planning. Our experimental results show that LM-SymOpt outperforms existing LLM-based planning approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15214v1-abstract-full').style.display = 'none'; document.getElementById('2501.15214v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14539">arXiv:2501.14539</a> <span> [<a href="https://arxiv.org/pdf/2501.14539">pdf</a>, <a href="https://arxiv.org/format/2501.14539">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Recurrent Spiking Network with Hierarchical Intrinsic Excitability Modulation for Schema Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yingchao Yu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yaochu Jin</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Y">Yuchen Xiao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuping Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14539v1-abstract-short" style="display: inline;"> Schema, a form of structured knowledge that promotes transfer learning, is attracting growing attention in both neuroscience and artificial intelligence (AI). Current schema research in neural computation is largely constrained to a single behavioral paradigm and relies heavily on recurrent neural networks (RNNs) which lack the neural plausibility and biological interpretability. To address these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14539v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14539v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14539v1-abstract-full" style="display: none;"> Schema, a form of structured knowledge that promotes transfer learning, is attracting growing attention in both neuroscience and artificial intelligence (AI). Current schema research in neural computation is largely constrained to a single behavioral paradigm and relies heavily on recurrent neural networks (RNNs) which lack the neural plausibility and biological interpretability. To address these limitations, this work first constructs a generalized behavioral paradigm framework for schema learning and introduces three novel cognitive tasks, thus supporting a comprehensive schema exploration. Second, we propose a new model using recurrent spiking neural networks with hierarchical intrinsic excitability modulation (HM-RSNNs). The top level of the model selects excitability properties for task-specific demands, while the bottom level fine-tunes these properties for intra-task problems. Finally, extensive visualization analyses of HM-RSNNs are conducted to showcase their computational advantages, track the intrinsic excitability evolution during schema learning, and examine neural coordination differences across tasks. Biologically inspired lesion studies further uncover task-specific distributions of intrinsic excitability within schemas. Experimental results show that HM-RSNNs significantly outperform RSNN baselines across all tasks and exceed RNNs in three novel cognitive tasks. Additionally, HM-RSNNs offer deeper insights into neural dynamics underlying schema learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14539v1-abstract-full').style.display = 'none'; document.getElementById('2501.14539v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13629">arXiv:2501.13629</a> <span> [<a href="https://arxiv.org/pdf/2501.13629">pdf</a>, <a href="https://arxiv.org/format/2501.13629">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Sigma: Differential Rescaling of Query, Key and Value for Efficient Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhenghao Lin</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zihao Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiao Liu</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Y">Yeyun Gong</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yi Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qi Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hang Li</a>, <a href="/search/cs?searchtype=author&query=Xin%2C+Y">Ying Xin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Ziyue Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+K">Kailai Yang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yu Yan</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiao Liang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Shuai Lu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yiming Huang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Z">Zheheng Luo</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lei Qu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+X">Xuan Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yaoxiang Wang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yuqing Xia</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+F">Feiyang Chen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuting Jiang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yasen Hu</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+H">Hao Ni</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Binyang Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+G">Guoshuai Zhao</a> , et al. (9 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13629v2-abstract-short" style="display: inline;"> We introduce Sigma, an efficient large language model specialized for the system domain, empowered by a novel architecture including DiffQKV attention, and pre-trained on our meticulously collected system domain data. DiffQKV attention significantly enhances the inference efficiency of Sigma by optimizing the Query (Q), Key (K), and Value (V) components in the attention mechanism differentially, b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13629v2-abstract-full').style.display = 'inline'; document.getElementById('2501.13629v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13629v2-abstract-full" style="display: none;"> We introduce Sigma, an efficient large language model specialized for the system domain, empowered by a novel architecture including DiffQKV attention, and pre-trained on our meticulously collected system domain data. DiffQKV attention significantly enhances the inference efficiency of Sigma by optimizing the Query (Q), Key (K), and Value (V) components in the attention mechanism differentially, based on their varying impacts on the model performance and efficiency indicators. Specifically, we (1) conduct extensive experiments that demonstrate the model's varying sensitivity to the compression of K and V components, leading to the development of differentially compressed KV, and (2) propose augmented Q to expand the Q head dimension, which enhances the model's representation capacity with minimal impacts on the inference speed. Rigorous theoretical and empirical analyses reveal that DiffQKV attention significantly enhances efficiency, achieving up to a 33.36% improvement in inference speed over the conventional grouped-query attention (GQA) in long-context scenarios. We pre-train Sigma on 6T tokens from various sources, including 19.5B system domain data that we carefully collect and 1T tokens of synthesized and rewritten data. In general domains, Sigma achieves comparable performance to other state-of-arts models. In the system domain, we introduce the first comprehensive benchmark AIMicius, where Sigma demonstrates remarkable performance across all tasks, significantly outperforming GPT-4 with an absolute improvement up to 52.5%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13629v2-abstract-full').style.display = 'none'; document.getElementById('2501.13629v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13258">arXiv:2501.13258</a> <span> [<a href="https://arxiv.org/pdf/2501.13258">pdf</a>, <a href="https://arxiv.org/format/2501.13258">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713293">10.1145/3706598.3713293 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> From Following to Understanding: Investigating the Role of Reflective Prompts in AR-Guided Tasks to Promote Task Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+N">Nandi Zhang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukang Yan</a>, <a href="/search/cs?searchtype=author&query=Suzuki%2C+R">Ryo Suzuki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13258v1-abstract-short" style="display: inline;"> Augmented Reality (AR) is a promising medium for guiding users through tasks, yet its impact on fostering deeper task understanding remains underexplored. This paper investigates the impact of reflective prompts -- strategic questions that encourage users to challenge assumptions, connect actions to outcomes, and consider hypothetical scenarios -- on task comprehension and performance. We conducte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13258v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13258v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13258v1-abstract-full" style="display: none;"> Augmented Reality (AR) is a promising medium for guiding users through tasks, yet its impact on fostering deeper task understanding remains underexplored. This paper investigates the impact of reflective prompts -- strategic questions that encourage users to challenge assumptions, connect actions to outcomes, and consider hypothetical scenarios -- on task comprehension and performance. We conducted a two-phase study: a formative survey and co-design sessions (N=9) to develop reflective prompts, followed by a within-subject evaluation (N=16) comparing AR instructions with and without these prompts in coffee-making and circuit assembly tasks. Our results show that reflective prompts significantly improved objective task understanding and resulted in more proactive information acquisition behaviors during task completion. These findings highlight the potential of incorporating reflective elements into AR instructions to foster deeper engagement and learning. Based on data from both studies, we synthesized design guidelines for integrating reflective elements into AR systems to enhance user understanding without compromising task performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13258v1-abstract-full').style.display = 'none'; document.getElementById('2501.13258v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CHI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12948">arXiv:2501.12948</a> <span> [<a href="https://arxiv.org/pdf/2501.12948">pdf</a>, <a href="https://arxiv.org/format/2501.12948">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeepSeek-AI"> DeepSeek-AI</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+D">Daya Guo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Dejian Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haowei Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">Junxiao Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruoyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Runxin Xu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qihao Zhu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shirong Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+X">Xiao Bi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaokang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xingkai Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+F">Z. F. Wu</a>, <a href="/search/cs?searchtype=author&query=Gou%2C+Z">Zhibin Gou</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Z">Zhihong Shao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoshu Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Ziyi Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Aixin Liu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+B">Bing Xue</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bingxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bochao Wu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+B">Bei Feng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chengda Lu</a> , et al. (175 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12948v1-abstract-short" style="display: inline;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12948v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12948v1-abstract-full" style="display: none;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters challenges such as poor readability, and language mixing. To address these issues and further enhance reasoning performance, we introduce DeepSeek-R1, which incorporates multi-stage training and cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217 on reasoning tasks. To support the research community, we open-source DeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B, 70B) distilled from DeepSeek-R1 based on Qwen and Llama. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'none'; document.getElementById('2501.12948v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11149">arXiv:2501.11149</a> <span> [<a href="https://arxiv.org/pdf/2501.11149">pdf</a>, <a href="https://arxiv.org/format/2501.11149">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> CART-MPC: Coordinating Assistive Devices for Robot-Assisted Transferring with Multi-Agent Model Predictive Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+R">Ruolin Ye</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shuaixing Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yunting Yan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Joyce Yang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+C">Christina Ge</a>, <a href="/search/cs?searchtype=author&query=Barreiros%2C+J">Jose Barreiros</a>, <a href="/search/cs?searchtype=author&query=Tsui%2C+K">Kate Tsui</a>, <a href="/search/cs?searchtype=author&query=Silver%2C+T">Tom Silver</a>, <a href="/search/cs?searchtype=author&query=Bhattacharjee%2C+T">Tapomayukh Bhattacharjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11149v1-abstract-short" style="display: inline;"> Bed-to-wheelchair transferring is a ubiquitous activity of daily living (ADL), but especially challenging for caregiving robots with limited payloads. We develop a novel algorithm that leverages the presence of other assistive devices: a Hoyer sling and a wheelchair for coarse manipulation of heavy loads, alongside a robot arm for fine-grained manipulation of deformable objects (Hoyer sling straps… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11149v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11149v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11149v1-abstract-full" style="display: none;"> Bed-to-wheelchair transferring is a ubiquitous activity of daily living (ADL), but especially challenging for caregiving robots with limited payloads. We develop a novel algorithm that leverages the presence of other assistive devices: a Hoyer sling and a wheelchair for coarse manipulation of heavy loads, alongside a robot arm for fine-grained manipulation of deformable objects (Hoyer sling straps). We instrument the Hoyer sling and wheelchair with actuators and sensors so that they can become intelligent agents in the algorithm. We then focus on one subtask of the transferring ADL -- tying Hoyer sling straps to the sling bar -- that exemplifies the challenges of transfer: multi-agent planning, deformable object manipulation, and generalization to varying hook shapes, sling materials, and care recipient bodies. To address these challenges, we propose CART-MPC, a novel algorithm based on turn-taking multi-agent model predictive control that uses a learned neural dynamics model for a keypoint-based representation of the deformable Hoyer sling strap, and a novel cost function that leverages linking numbers from knot theory and neural amortization to accelerate inference. We validate it in both RCareWorld simulation and real-world environments. In simulation, CART-MPC successfully generalizes across diverse hook designs, sling materials, and care recipient body shapes. In the real world, we show zero-shot sim-to-real generalization capabilities to tie deformable Hoyer sling straps on a sling bar towards transferring a manikin from a hospital bed to a wheelchair. See our website for supplementary materials: https://emprise.cs.cornell.edu/cart-mpc/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11149v1-abstract-full').style.display = 'none'; document.getElementById('2501.11149v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10484">arXiv:2501.10484</a> <span> [<a href="https://arxiv.org/pdf/2501.10484">pdf</a>, <a href="https://arxiv.org/format/2501.10484">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bias in Decision-Making for AI's Ethical Dilemmas: A Comparative Study of ChatGPT and Claude </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yile Yan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuqi Zhu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Wentao Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10484v1-abstract-short" style="display: inline;"> Recent advances in Large Language Models (LLMs) have enabled human-like responses across various tasks, raising questions about their ethical decision-making capabilities and potential biases. This study investigates protected attributes in LLMs through systematic evaluation of their responses to ethical dilemmas. Using two prominent models - GPT-3.5 Turbo and Claude 3.5 Sonnet - we analyzed their… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10484v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10484v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10484v1-abstract-full" style="display: none;"> Recent advances in Large Language Models (LLMs) have enabled human-like responses across various tasks, raising questions about their ethical decision-making capabilities and potential biases. This study investigates protected attributes in LLMs through systematic evaluation of their responses to ethical dilemmas. Using two prominent models - GPT-3.5 Turbo and Claude 3.5 Sonnet - we analyzed their decision-making patterns across multiple protected attributes including age, gender, race, appearance, and disability status. Through 11,200 experimental trials involving both single-factor and two-factor protected attribute combinations, we evaluated the models' ethical preferences, sensitivity, stability, and clustering of preferences. Our findings reveal significant protected attributeses in both models, with consistent preferences for certain features (e.g., "good-looking") and systematic neglect of others. Notably, while GPT-3.5 Turbo showed stronger preferences aligned with traditional power structures, Claude 3.5 Sonnet demonstrated more diverse protected attribute choices. We also found that ethical sensitivity significantly decreases in more complex scenarios involving multiple protected attributes. Additionally, linguistic referents heavily influence the models' ethical evaluations, as demonstrated by differing responses to racial descriptors (e.g., "Yellow" versus "Asian"). These findings highlight critical concerns about the potential impact of LLM biases in autonomous decision-making systems and emphasize the need for careful consideration of protected attributes in AI development. Our study contributes to the growing body of research on AI ethics by providing a systematic framework for evaluating protected attributes in LLMs' ethical decision-making capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10484v1-abstract-full').style.display = 'none'; document.getElementById('2501.10484v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10282">arXiv:2501.10282</a> <span> [<a href="https://arxiv.org/pdf/2501.10282">pdf</a>, <a href="https://arxiv.org/format/2501.10282">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Computational Protein Science in the Era of Large Language Models (LLMs) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+W">Wenqi Fan</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yi Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shijie Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuyao Yan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Q">Qian Zhao</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Le Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qing Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10282v2-abstract-short" style="display: inline;"> Considering the significance of proteins, computational protein science has always been a critical scientific field, dedicated to revealing knowledge and developing applications within the protein sequence-structure-function paradigm. In the last few decades, Artificial Intelligence (AI) has made significant impacts in computational protein science, leading to notable successes in specific protein… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10282v2-abstract-full').style.display = 'inline'; document.getElementById('2501.10282v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10282v2-abstract-full" style="display: none;"> Considering the significance of proteins, computational protein science has always been a critical scientific field, dedicated to revealing knowledge and developing applications within the protein sequence-structure-function paradigm. In the last few decades, Artificial Intelligence (AI) has made significant impacts in computational protein science, leading to notable successes in specific protein modeling tasks. However, those previous AI models still meet limitations, such as the difficulty in comprehending the semantics of protein sequences, and the inability to generalize across a wide range of protein modeling tasks. Recently, LLMs have emerged as a milestone in AI due to their unprecedented language processing & generalization capability. They can promote comprehensive progress in fields rather than solving individual tasks. As a result, researchers have actively introduced LLM techniques in computational protein science, developing protein Language Models (pLMs) that skillfully grasp the foundational knowledge of proteins and can be effectively generalized to solve a diversity of sequence-structure-function reasoning problems. While witnessing prosperous developments, it's necessary to present a systematic overview of computational protein science empowered by LLM techniques. First, we summarize existing pLMs into categories based on their mastered protein knowledge, i.e., underlying sequence patterns, explicit structural and functional information, and external scientific languages. Second, we introduce the utilization and adaptation of pLMs, highlighting their remarkable achievements in promoting protein structure prediction, protein function prediction, and protein design studies. Then, we describe the practical application of pLMs in antibody design, enzyme design, and drug discovery. Finally, we specifically discuss the promising future directions in this fast-growing field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10282v2-abstract-full').style.display = 'none'; document.getElementById('2501.10282v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09686">arXiv:2501.09686</a> <span> [<a href="https://arxiv.org/pdf/2501.09686">pdf</a>, <a href="https://arxiv.org/format/2501.09686">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Towards Large Reasoning Models: A Survey of Reinforced Reasoning with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fengli Xu</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+Q">Qianyue Hao</a>, <a href="/search/cs?searchtype=author&query=Zong%2C+Z">Zefang Zong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingwei Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yunke Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingyi Wang</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+X">Xiaochong Lan</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+J">Jiahui Gong</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+T">Tianjian Ouyang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+F">Fanjin Meng</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+C">Chenyang Shao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuwei Yan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qinglong Yang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yiwen Song</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+S">Sijian Ren</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xinyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yu Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+J">Jie Feng</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chen Gao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09686v3-abstract-short" style="display: inline;"> Language has long been conceived as an essential tool for human reasoning. The breakthrough of Large Language Models (LLMs) has sparked significant research interest in leveraging these models to tackle complex reasoning tasks. Researchers have moved beyond simple autoregressive token generation by introducing the concept of "thought" -- a sequence of tokens representing intermediate steps in the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09686v3-abstract-full').style.display = 'inline'; document.getElementById('2501.09686v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09686v3-abstract-full" style="display: none;"> Language has long been conceived as an essential tool for human reasoning. The breakthrough of Large Language Models (LLMs) has sparked significant research interest in leveraging these models to tackle complex reasoning tasks. Researchers have moved beyond simple autoregressive token generation by introducing the concept of "thought" -- a sequence of tokens representing intermediate steps in the reasoning process. This innovative paradigm enables LLMs' to mimic complex human reasoning processes, such as tree search and reflective thinking. Recently, an emerging trend of learning to reason has applied reinforcement learning (RL) to train LLMs to master reasoning processes. This approach enables the automatic generation of high-quality reasoning trajectories through trial-and-error search algorithms, significantly expanding LLMs' reasoning capacity by providing substantially more training data. Furthermore, recent studies demonstrate that encouraging LLMs to "think" with more tokens during test-time inference can further significantly boost reasoning accuracy. Therefore, the train-time and test-time scaling combined to show a new research frontier -- a path toward Large Reasoning Model. The introduction of OpenAI's o1 series marks a significant milestone in this research direction. In this survey, we present a comprehensive review of recent progress in LLM reasoning. We begin by introducing the foundational background of LLMs and then explore the key technical components driving the development of large reasoning models, with a focus on automated data construction, learning-to-reason techniques, and test-time scaling. We also analyze popular open-source projects at building large reasoning models, and conclude with open challenges and future research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09686v3-abstract-full').style.display = 'none'; document.getElementById('2501.09686v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06271">arXiv:2501.06271</a> <span> [<a href="https://arxiv.org/pdf/2501.06271">pdf</a>, <a href="https://arxiv.org/format/2501.06271">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models for Bioinformatics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ruan%2C+W">Wei Ruan</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+Y">Yanjun Lyu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+J">Jiazhang Cai</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+P">Peng Shu</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yang Ge</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yao Lu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shang Gao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peilong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Lin Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yufang Liu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+L">Luyang Fang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziyu Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhengliang Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yiwei Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zihao Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Junhao Chen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Hanqi Jiang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Y">Yi Pan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhenyuan Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jingyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+S">Shizhe Liang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> , et al. (30 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06271v1-abstract-short" style="display: inline;"> With the rapid advancements in large language model (LLM) technology and the emergence of bioinformatics-specific language models (BioLMs), there is a growing need for a comprehensive analysis of the current landscape, computational characteristics, and diverse applications. This survey aims to address this need by providing a thorough review of BioLMs, focusing on their evolution, classification,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06271v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06271v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06271v1-abstract-full" style="display: none;"> With the rapid advancements in large language model (LLM) technology and the emergence of bioinformatics-specific language models (BioLMs), there is a growing need for a comprehensive analysis of the current landscape, computational characteristics, and diverse applications. This survey aims to address this need by providing a thorough review of BioLMs, focusing on their evolution, classification, and distinguishing features, alongside a detailed examination of training methodologies, datasets, and evaluation frameworks. We explore the wide-ranging applications of BioLMs in critical areas such as disease diagnosis, drug discovery, and vaccine development, highlighting their impact and transformative potential in bioinformatics. We identify key challenges and limitations inherent in BioLMs, including data privacy and security concerns, interpretability issues, biases in training data and model outputs, and domain adaptation complexities. Finally, we highlight emerging trends and future directions, offering valuable insights to guide researchers and clinicians toward advancing BioLMs for increasingly sophisticated biological and clinical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06271v1-abstract-full').style.display = 'none'; document.getElementById('2501.06271v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">64 pages, 1 figure</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yan%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository