Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 10,917 results for author: <span class="mathjax">Zhang, H</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zhang, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zhang%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zhang, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13923">arXiv:2502.13923</a> <span> [<a href="https://arxiv.org/pdf/2502.13923">pdf</a>, <a href="https://arxiv.org/format/2502.13923">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Qwen2.5-VL Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Bai%2C+S">Shuai Bai</a>, <a href="/search/?searchtype=author&query=Chen%2C+K">Keqin Chen</a>, <a href="/search/?searchtype=author&query=Liu%2C+X">Xuejing Liu</a>, <a href="/search/?searchtype=author&query=Wang%2C+J">Jialin Wang</a>, <a href="/search/?searchtype=author&query=Ge%2C+W">Wenbin Ge</a>, <a href="/search/?searchtype=author&query=Song%2C+S">Sibo Song</a>, <a href="/search/?searchtype=author&query=Dang%2C+K">Kai Dang</a>, <a href="/search/?searchtype=author&query=Wang%2C+P">Peng Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">Shijie Wang</a>, <a href="/search/?searchtype=author&query=Tang%2C+J">Jun Tang</a>, <a href="/search/?searchtype=author&query=Zhong%2C+H">Humen Zhong</a>, <a href="/search/?searchtype=author&query=Zhu%2C+Y">Yuanzhi Zhu</a>, <a href="/search/?searchtype=author&query=Yang%2C+M">Mingkun Yang</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Zhaohai Li</a>, <a href="/search/?searchtype=author&query=Wan%2C+J">Jianqiang Wan</a>, <a href="/search/?searchtype=author&query=Wang%2C+P">Pengfei Wang</a>, <a href="/search/?searchtype=author&query=Ding%2C+W">Wei Ding</a>, <a href="/search/?searchtype=author&query=Fu%2C+Z">Zheren Fu</a>, <a href="/search/?searchtype=author&query=Xu%2C+Y">Yiheng Xu</a>, <a href="/search/?searchtype=author&query=Ye%2C+J">Jiabo Ye</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xi Zhang</a>, <a href="/search/?searchtype=author&query=Xie%2C+T">Tianbao Xie</a>, <a href="/search/?searchtype=author&query=Cheng%2C+Z">Zesen Cheng</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hang Zhang</a>, <a href="/search/?searchtype=author&query=Yang%2C+Z">Zhibo Yang</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13923v1-abstract-short" style="display: inline;"> We introduce Qwen2.5-VL, the latest flagship model of Qwen vision-language series, which demonstrates significant advancements in both foundational capabilities and innovative functionalities. Qwen2.5-VL achieves a major leap forward in understanding and interacting with the world through enhanced visual recognition, precise object localization, robust document parsing, and long-video comprehensio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13923v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13923v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13923v1-abstract-full" style="display: none;"> We introduce Qwen2.5-VL, the latest flagship model of Qwen vision-language series, which demonstrates significant advancements in both foundational capabilities and innovative functionalities. Qwen2.5-VL achieves a major leap forward in understanding and interacting with the world through enhanced visual recognition, precise object localization, robust document parsing, and long-video comprehension. A standout feature of Qwen2.5-VL is its ability to localize objects using bounding boxes or points accurately. It provides robust structured data extraction from invoices, forms, and tables, as well as detailed analysis of charts, diagrams, and layouts. To handle complex inputs, Qwen2.5-VL introduces dynamic resolution processing and absolute time encoding, enabling it to process images of varying sizes and videos of extended durations (up to hours) with second-level event localization. This allows the model to natively perceive spatial scales and temporal dynamics without relying on traditional normalization techniques. By training a native dynamic-resolution Vision Transformer (ViT) from scratch and incorporating Window Attention, we reduce computational overhead while maintaining native resolution. As a result, Qwen2.5-VL excels not only in static image and document understanding but also as an interactive visual agent capable of reasoning, tool usage, and task execution in real-world scenarios such as operating computers and mobile devices. Qwen2.5-VL is available in three sizes, addressing diverse use cases from edge AI to high-performance computing. The flagship Qwen2.5-VL-72B model matches state-of-the-art models like GPT-4o and Claude 3.5 Sonnet, particularly excelling in document and diagram understanding. Additionally, Qwen2.5-VL maintains robust linguistic performance, preserving the core language competencies of the Qwen2.5 LLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13923v1-abstract-full').style.display = 'none'; document.getElementById('2502.13923v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13817">arXiv:2502.13817</a> <span> [<a href="https://arxiv.org/pdf/2502.13817">pdf</a>, <a href="https://arxiv.org/format/2502.13817">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Plasma Physics">physics.plasm-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Accelerator Physics">physics.acc-ph</span> </div> </div> <p class="title is-5 mathjax"> Relativistic Leaky Surface Plasmon Accelerator in Nanostructured Carbon Nanotube Forest </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lei%2C+B">Bifeng Lei</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/?searchtype=author&query=Bontoiu%2C+C">Cristian Bontoiu</a>, <a href="/search/?searchtype=author&query=Bonatto%2C+A">Alexandre Bonatto</a>, <a href="/search/?searchtype=author&query=Martin-Luna%2C+P">Pablo Martin-Luna</a>, <a href="/search/?searchtype=author&query=Liu%2C+B">Bin Liu</a>, <a href="/search/?searchtype=author&query=Resta-Lopez%2C+J">Javier Resta-Lopez</a>, <a href="/search/?searchtype=author&query=Xia%2C+G">Guoxing Xia</a>, <a href="/search/?searchtype=author&query=Welsch%2C+C">Carsten Welsch</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13817v1-abstract-short" style="display: inline;"> In quantum-degenerate metallic carbon nanotubes (CNTs), the conduction electron gas forms a solid-state plasma environment where collective dynamics are dominant. This dense plasma is capable of supporting the excitation of surface plasmon on the CNT surface. In this study, we demonstrate a new class of microscale relativistic surface plasmons (RSPs) that, independent of laser polarisation and mat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13817v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13817v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13817v1-abstract-full" style="display: none;"> In quantum-degenerate metallic carbon nanotubes (CNTs), the conduction electron gas forms a solid-state plasma environment where collective dynamics are dominant. This dense plasma is capable of supporting the excitation of surface plasmon on the CNT surface. In this study, we demonstrate a new class of microscale relativistic surface plasmons (RSPs) that, independent of laser polarisation and matching medium, can be directly excited by paraxial propagation of a high-intensity optical laser pulse through a cylindrical vacuum channel nanostructured in a CNT forest. The leaky field of the resonant RSP mode can support a non-evanescent longitudinal component with amplitude up to several TV/m-levels and a phase-matched focusing field. This field enables the sustained acceleration of relativistic electrons and positrons over long distances. We also investigate two possible mechanisms for electron injection, including edge injection and self-injection, both of which are feasible with current laser facilities, paving the way for experimental realisation. As well as highlighting a promising avenue for the development of ultra-compact, high-energy solid-state plasma particle accelerators, this work also opens up the potential for high-energy plasmonics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13817v1-abstract-full').style.display = 'none'; document.getElementById('2502.13817v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13782">arXiv:2502.13782</a> <span> [<a href="https://arxiv.org/pdf/2502.13782">pdf</a>, <a href="https://arxiv.org/format/2502.13782">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="General Relativity and Quantum Cosmology">gr-qc</span> </div> </div> <p class="title is-5 mathjax"> Misner-Sharp Energy and P-V Criticality in Quasi-Topological Cosmology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chu%2C+Y">Yue Chu</a>, <a href="/search/?searchtype=author&query=Kong%2C+S">Shi-Bei Kong</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongsheng Zhang</a>, <a href="/search/?searchtype=author&query=Hu%2C+Y">Ya-Peng Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13782v1-abstract-short" style="display: inline;"> We presented a sound foundation of thermodynamics for a Friedmann-Robertson-Walker (FRW) universe from the first principle in ground-breaking work [Hu et al., JHEP12 (2022) 168]. Based on such an approach, we explore the thermodynamics of cosmology in quasi-topology gravity. Starting from the unified first law, we first obtain the well-defined Misner-Sharp energy in quasi-topology cosmology. We de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13782v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13782v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13782v1-abstract-full" style="display: none;"> We presented a sound foundation of thermodynamics for a Friedmann-Robertson-Walker (FRW) universe from the first principle in ground-breaking work [Hu et al., JHEP12 (2022) 168]. Based on such an approach, we explore the thermodynamics of cosmology in quasi-topology gravity. Starting from the unified first law, we first obtain the well-defined Misner-Sharp energy in quasi-topology cosmology. We demonstrate that the Misner-Sharp energy is equal to $蟻V$ inside the apparent horizon. Further, the unified first law requires extra terms for generalized force and conjugate generalized position, which are identified as thermodynamic pressure and thermodynamic volume, respectively. Hence we naturally derive the equation of state of the FRW universe in quasi-topology gravity, and show that it undergoes $P$-$V$ phase transitions. We calculate the critical exponents for the phase transition, which may be beneficial to probe the micro theory of quasi-topology gravity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13782v1-abstract-full').style.display = 'none'; document.getElementById('2502.13782v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 1 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13723">arXiv:2502.13723</a> <span> [<a href="https://arxiv.org/pdf/2502.13723">pdf</a>, <a href="https://arxiv.org/format/2502.13723">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Direct Value Optimization: Improving Chain-of-Thought Reasoning in LLMs with Refined Values </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongbo Zhang</a>, <a href="/search/?searchtype=author&query=Cui%2C+H">Han Cui</a>, <a href="/search/?searchtype=author&query=Bao%2C+G">Guangsheng Bao</a>, <a href="/search/?searchtype=author&query=Yang%2C+L">Linyi Yang</a>, <a href="/search/?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yue Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13723v1-abstract-short" style="display: inline;"> We introduce Direct Value Optimization (DVO), an innovative reinforcement learning framework for enhancing large language models in complex reasoning tasks. Unlike traditional methods relying on preference labels, DVO utilizes value signals at individual reasoning steps, optimizing models via a mean squared error loss. The key benefit of DVO lies in its fine-grained supervision, circumventing the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13723v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13723v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13723v1-abstract-full" style="display: none;"> We introduce Direct Value Optimization (DVO), an innovative reinforcement learning framework for enhancing large language models in complex reasoning tasks. Unlike traditional methods relying on preference labels, DVO utilizes value signals at individual reasoning steps, optimizing models via a mean squared error loss. The key benefit of DVO lies in its fine-grained supervision, circumventing the need for labor-intensive human annotations. Target values within the DVO are estimated using either Monte Carlo Tree Search or an outcome value model. Our empirical analysis on both mathematical and commonsense reasoning tasks shows that DVO consistently outperforms existing offline preference optimization techniques, even with fewer training steps. These findings underscore the importance of value signals in advancing reasoning capabilities and highlight DVO as a superior methodology under scenarios lacking explicit human preference information. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13723v1-abstract-full').style.display = 'none'; document.getElementById('2502.13723v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13694">arXiv:2502.13694</a> <span> [<a href="https://arxiv.org/pdf/2502.13694">pdf</a>, <a href="https://arxiv.org/format/2502.13694">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Natural damping of time-harmonic waves and its influence on Schwarz methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gander%2C+M+J">Martin J. Gander</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13694v1-abstract-short" style="display: inline;"> The influence of various damping on the performance of Schwarz methods for time-harmonic waves is visualized by Fourier analysis. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13694v1-abstract-full" style="display: none;"> The influence of various damping on the performance of Schwarz methods for time-harmonic waves is visualized by Fourier analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13694v1-abstract-full').style.display = 'none'; document.getElementById('2502.13694v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, submitted to the 28th International Conference on Domain Decomposition Methods</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13562">arXiv:2502.13562</a> <span> [<a href="https://arxiv.org/pdf/2502.13562">pdf</a>, <a href="https://arxiv.org/format/2502.13562">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Are Large Language Models In-Context Graph Learners? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+J">Jintang Li</a>, <a href="/search/?searchtype=author&query=Wu%2C+R">Ruofan Wu</a>, <a href="/search/?searchtype=author&query=Zhu%2C+Y">Yuchang Zhu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Huizhe Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Liang Chen</a>, <a href="/search/?searchtype=author&query=Zheng%2C+Z">Zibin Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13562v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable in-context reasoning capabilities across a wide range of tasks, particularly with unstructured inputs such as language or images. However, LLMs struggle to handle structured data, such as graphs, due to their lack of understanding of non-Euclidean structures. As a result, without additional fine-tuning, their performance significantly lags… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13562v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13562v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13562v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable in-context reasoning capabilities across a wide range of tasks, particularly with unstructured inputs such as language or images. However, LLMs struggle to handle structured data, such as graphs, due to their lack of understanding of non-Euclidean structures. As a result, without additional fine-tuning, their performance significantly lags behind that of graph neural networks (GNNs) in graph learning tasks. In this paper, we show that learning on graph data can be conceptualized as a retrieval-augmented generation (RAG) process, where specific instances (e.g., nodes or edges) act as queries, and the graph itself serves as the retrieved context. Building on this insight, we propose a series of RAG frameworks to enhance the in-context learning capabilities of LLMs for graph learning tasks. Comprehensive evaluations demonstrate that our proposed RAG frameworks significantly improve LLM performance on graph-based tasks, particularly in scenarios where a pretrained LLM must be used without modification or accessed via an API. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13562v1-abstract-full').style.display = 'none'; document.getElementById('2502.13562v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint, under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13540">arXiv:2502.13540</a> <span> [<a href="https://arxiv.org/pdf/2502.13540">pdf</a>, <a href="https://arxiv.org/format/2502.13540">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Amplitude analysis of $蠄(3686)\to 纬K_S^0 K_S^0 $ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=BESIII+Collaboration"> BESIII Collaboration</a>, <a href="/search/?searchtype=author&query=Ablikim%2C+M">M. Ablikim</a>, <a href="/search/?searchtype=author&query=Achasov%2C+M+N">M. N. Achasov</a>, <a href="/search/?searchtype=author&query=Adlarson%2C+P">P. Adlarson</a>, <a href="/search/?searchtype=author&query=Ai%2C+X+C">X. C. Ai</a>, <a href="/search/?searchtype=author&query=Aliberti%2C+R">R. Aliberti</a>, <a href="/search/?searchtype=author&query=Amoroso%2C+A">A. Amoroso</a>, <a href="/search/?searchtype=author&query=An%2C+Q">Q. An</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Y. Bai</a>, <a href="/search/?searchtype=author&query=Bakina%2C+O">O. Bakina</a>, <a href="/search/?searchtype=author&query=Ban%2C+Y">Y. Ban</a>, <a href="/search/?searchtype=author&query=Bao%2C+H+-">H. -R. Bao</a>, <a href="/search/?searchtype=author&query=Batozskaya%2C+V">V. Batozskaya</a>, <a href="/search/?searchtype=author&query=Begzsuren%2C+K">K. Begzsuren</a>, <a href="/search/?searchtype=author&query=Berger%2C+N">N. Berger</a>, <a href="/search/?searchtype=author&query=Berlowski%2C+M">M. Berlowski</a>, <a href="/search/?searchtype=author&query=Bertani%2C+M">M. Bertani</a>, <a href="/search/?searchtype=author&query=Bettoni%2C+D">D. Bettoni</a>, <a href="/search/?searchtype=author&query=Bianchi%2C+F">F. Bianchi</a>, <a href="/search/?searchtype=author&query=Bianco%2C+E">E. Bianco</a>, <a href="/search/?searchtype=author&query=Bortone%2C+A">A. Bortone</a>, <a href="/search/?searchtype=author&query=Boyko%2C+I">I. Boyko</a>, <a href="/search/?searchtype=author&query=Briere%2C+R+A">R. A. Briere</a>, <a href="/search/?searchtype=author&query=Brueggemann%2C+A">A. Brueggemann</a>, <a href="/search/?searchtype=author&query=Cai%2C+H">H. Cai</a> , et al. (704 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13540v1-abstract-short" style="display: inline;"> Using $(2712\pm14)\times10^6$ $蠄(3686)$ events collected with the BESIII detector, we perform the first amplitude analysis of the radiative decay $蠄(3686)\to 纬K_S^0 K_S^0$ within the mass region $M_{K_S^0 K_S^0 }<2.8$ GeV/$c^2$. Employing a one-channel K-matrix approach for the description of the dynamics of the $K^0_S K^0_S$ system, the data sample is well described with four poles for the $f_0$-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13540v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13540v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13540v1-abstract-full" style="display: none;"> Using $(2712\pm14)\times10^6$ $蠄(3686)$ events collected with the BESIII detector, we perform the first amplitude analysis of the radiative decay $蠄(3686)\to 纬K_S^0 K_S^0$ within the mass region $M_{K_S^0 K_S^0 }<2.8$ GeV/$c^2$. Employing a one-channel K-matrix approach for the description of the dynamics of the $K^0_S K^0_S$ system, the data sample is well described with four poles for the $f_0$-wave and three poles for the $f_2$-wave. The determined pole positions are consistent with those of well-established resonance states. The observed $f_0$ and $f_{2}$ states are found to be qualitatively consistent with those produced in radiative $J/蠄$ decays, indicating the similarity between the two charmonium states in their radiative decays. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13540v1-abstract-full').style.display = 'none'; document.getElementById('2502.13540v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 4 figures, submitted to JHEP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13354">arXiv:2502.13354</a> <span> [<a href="https://arxiv.org/pdf/2502.13354">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Superconductivity">cond-mat.supr-con</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Origin of the tiny energy gap and Dirac points in monoclinic trilayer nickelate La$_4$Ni$_3$O$_{10}$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+H">Hu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13354v1-abstract-short" style="display: inline;"> Superconductivity was recently found in trilayer nickelate La$_4$Ni$_3$O$_{10}$ under high pressure with a phase transition from the monoclinic P2$_1$/a structure to the tetragonal I4/mmm structure. Previous experimental works have confirmed the existence of a tiny energy gap formed with Ni 3d$_{z^2}$ orbitals in monoclinic La$_4$Ni$_3$O$_{10}$. Here we investigate the physical origin of this gap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13354v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13354v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13354v1-abstract-full" style="display: none;"> Superconductivity was recently found in trilayer nickelate La$_4$Ni$_3$O$_{10}$ under high pressure with a phase transition from the monoclinic P2$_1$/a structure to the tetragonal I4/mmm structure. Previous experimental works have confirmed the existence of a tiny energy gap formed with Ni 3d$_{z^2}$ orbitals in monoclinic La$_4$Ni$_3$O$_{10}$. Here we investigate the physical origin of this gap by analyzing symmetry properties of energy bands based on the group theory. The tiny gap comes from energy bands with opposite parity at the Brillouin zone center. In addition, we also find previously unknown Dirac points in some momentum directions around the Fermi level. An effective Hamiltonian is constructed to describe low energy physics of the tiny energy gap and Dirac points. Due to the low crystal symmetry of monoclinic La$_4$Ni$_3$O$_{10}$, its energy bands display strong anisotropic properties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13354v1-abstract-full').style.display = 'none'; document.getElementById('2502.13354v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13352">arXiv:2502.13352</a> <span> [<a href="https://arxiv.org/pdf/2502.13352">pdf</a>, <a href="https://arxiv.org/format/2502.13352">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Integrated Sensing and Communication for 6G Holographic Digital Twins </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+H">Haijun Zhang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Ziyang Zhang</a>, <a href="/search/?searchtype=author&query=Liu%2C+X">Xiangnan Liu</a>, <a href="/search/?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/?searchtype=author&query=Li%2C+H">Haojin Li</a>, <a href="/search/?searchtype=author&query=Sun%2C+C">Chen Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13352v1-abstract-short" style="display: inline;"> With the advent of 6G networks, offering ultra-high bandwidth and ultra-low latency, coupled with the enhancement of terminal device resolutions, holographic communication is gradually becoming a reality. Holographic digital twin (HDT) is considered one of key applications of holographic communication, capable of creating virtual replicas for real-time mapping and prediction of physical entity sta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13352v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13352v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13352v1-abstract-full" style="display: none;"> With the advent of 6G networks, offering ultra-high bandwidth and ultra-low latency, coupled with the enhancement of terminal device resolutions, holographic communication is gradually becoming a reality. Holographic digital twin (HDT) is considered one of key applications of holographic communication, capable of creating virtual replicas for real-time mapping and prediction of physical entity states, and performing three-dimensional reproduction of spatial information. In this context, integrated sensing and communication (ISAC) is expected to be a crucial pathway for providing data sources to HDT. This paper proposes a four-layer architecture assisted by ISAC for HDT, integrating emerging paradigms and key technologies to achieve low-cost, high-precision environmental data collection for constructing HDT. Specifically, to enhance sensing resolution, we explore super-resolution techniques from the perspectives of parameter estimation and point cloud construction. Additionally, we focus on multi-point collaborative sensing for constructing HDT, and provide a comprehensive review of four key techniques: node selection, multi-band collaboration, cooperative beamforming, and data fusion. Finally, we highlight several interesting research directions to guide and inspire future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13352v1-abstract-full').style.display = 'none'; document.getElementById('2502.13352v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13131">arXiv:2502.13131</a> <span> [<a href="https://arxiv.org/pdf/2502.13131">pdf</a>, <a href="https://arxiv.org/format/2502.13131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Diverse Human Preference Learning through Principal Component Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Luo%2C+F">Feng Luo</a>, <a href="/search/?searchtype=author&query=Yang%2C+R">Rui Yang</a>, <a href="/search/?searchtype=author&query=Sun%2C+H">Hao Sun</a>, <a href="/search/?searchtype=author&query=Deng%2C+C">Chunyuan Deng</a>, <a href="/search/?searchtype=author&query=Yao%2C+J">Jiarui Yao</a>, <a href="/search/?searchtype=author&query=Shen%2C+J">Jingyan Shen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Huan Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+H">Hanjie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13131v1-abstract-short" style="display: inline;"> Understanding human preferences is crucial for improving foundation models and building personalized AI systems. However, preferences are inherently diverse and complex, making it difficult for traditional reward models to capture their full range. While fine-grained preference data can help, collecting it is expensive and hard to scale. In this paper, we introduce Decomposed Reward Models (DRMs),… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13131v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13131v1-abstract-full" style="display: none;"> Understanding human preferences is crucial for improving foundation models and building personalized AI systems. However, preferences are inherently diverse and complex, making it difficult for traditional reward models to capture their full range. While fine-grained preference data can help, collecting it is expensive and hard to scale. In this paper, we introduce Decomposed Reward Models (DRMs), a novel approach that extracts diverse human preferences from binary comparisons without requiring fine-grained annotations. Our key insight is to represent human preferences as vectors and analyze them using Principal Component Analysis (PCA). By constructing a dataset of embedding differences between preferred and rejected responses, DRMs identify orthogonal basis vectors that capture distinct aspects of preference. These decomposed rewards can be flexibly combined to align with different user needs, offering an interpretable and scalable alternative to traditional reward models. We demonstrate that DRMs effectively extract meaningful preference dimensions (e.g., helpfulness, safety, humor) and adapt to new users without additional training. Our results highlight DRMs as a powerful framework for personalized and interpretable LLM alignment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13131v1-abstract-full').style.display = 'none'; document.getElementById('2502.13131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13092">arXiv:2502.13092</a> <span> [<a href="https://arxiv.org/pdf/2502.13092">pdf</a>, <a href="https://arxiv.org/format/2502.13092">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Text2World: Benchmarking Large Language Models for Symbolic World Model Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hu%2C+M">Mengkang Hu</a>, <a href="/search/?searchtype=author&query=Chen%2C+T">Tianxing Chen</a>, <a href="/search/?searchtype=author&query=Zou%2C+Y">Yude Zou</a>, <a href="/search/?searchtype=author&query=Lei%2C+Y">Yuheng Lei</a>, <a href="/search/?searchtype=author&query=Chen%2C+Q">Qiguang Chen</a>, <a href="/search/?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongyuan Zhang</a>, <a href="/search/?searchtype=author&query=Shao%2C+W">Wenqi Shao</a>, <a href="/search/?searchtype=author&query=Luo%2C+P">Ping Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13092v1-abstract-short" style="display: inline;"> Recently, there has been growing interest in leveraging large language models (LLMs) to generate symbolic world models from textual descriptions. Although LLMs have been extensively explored in the context of world modeling, prior studies encountered several challenges, including evaluation randomness, dependence on indirect metrics, and a limited domain scope. To address these limitations, we int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13092v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13092v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13092v1-abstract-full" style="display: none;"> Recently, there has been growing interest in leveraging large language models (LLMs) to generate symbolic world models from textual descriptions. Although LLMs have been extensively explored in the context of world modeling, prior studies encountered several challenges, including evaluation randomness, dependence on indirect metrics, and a limited domain scope. To address these limitations, we introduce a novel benchmark, Text2World, based on planning domain definition language (PDDL), featuring hundreds of diverse domains and employing multi-criteria, execution-based metrics for a more robust evaluation. We benchmark current LLMs using Text2World and find that reasoning models trained with large-scale reinforcement learning outperform others. However, even the best-performing model still demonstrates limited capabilities in world modeling. Building on these insights, we examine several promising strategies to enhance the world modeling capabilities of LLMs, including test-time scaling, agent training, and more. We hope that Text2World can serve as a crucial resource, laying the groundwork for future research in leveraging LLMs as world models. The project page is available at https://text-to-world.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13092v1-abstract-full').style.display = 'none'; document.getElementById('2502.13092v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://text-to-world.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12961">arXiv:2502.12961</a> <span> [<a href="https://arxiv.org/pdf/2502.12961">pdf</a>, <a href="https://arxiv.org/format/2502.12961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Tool Use in Large Language Models with Meta-Cognition Trigger </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+W">Wenjun Li</a>, <a href="/search/?searchtype=author&query=Li%2C+D">Dexun Li</a>, <a href="/search/?searchtype=author&query=Dong%2C+K">Kuicai Dong</a>, <a href="/search/?searchtype=author&query=Zhang%2C+C">Cong Zhang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/?searchtype=author&query=Liu%2C+W">Weiwen Liu</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yasheng Wang</a>, <a href="/search/?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12961v1-abstract-short" style="display: inline;"> Large language models (LLMs) have shown remarkable emergent capabilities, transforming the execution of functional tasks by leveraging external tools for complex problems that require specialized processing or real-time data. While existing research expands LLMs access to diverse tools (e.g., program interpreters, search engines, weather/map apps), the necessity of using these tools is often overl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12961v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12961v1-abstract-full" style="display: none;"> Large language models (LLMs) have shown remarkable emergent capabilities, transforming the execution of functional tasks by leveraging external tools for complex problems that require specialized processing or real-time data. While existing research expands LLMs access to diverse tools (e.g., program interpreters, search engines, weather/map apps), the necessity of using these tools is often overlooked, leading to indiscriminate tool invocation. This naive approach raises two key issues:(1) increased delays due to unnecessary tool calls, and (2) potential errors resulting from faulty interactions with external tools. In this paper, we introduce meta-cognition as a proxy for LLMs self-assessment of their capabilities, representing the model's awareness of its own limitations. Based on this, we propose MeCo, an adaptive decision-making strategy for external tool use. MeCo quantifies metacognitive scores by capturing high-level cognitive signals in the representation space, guiding when to invoke tools. Notably, MeCo is fine-tuning-free and incurs minimal cost. Our experiments show that MeCo accurately detects LLMs' internal cognitive signals and significantly improves tool-use decision-making across multiple base models and benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12961v1-abstract-full').style.display = 'none'; document.getElementById('2502.12961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12834">arXiv:2502.12834</a> <span> [<a href="https://arxiv.org/pdf/2502.12834">pdf</a>, <a href="https://arxiv.org/format/2502.12834">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NTP-INT: Network Traffic Prediction-Driven In-band Network Telemetry for High-load Switches </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+P">Penghui Zhang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hua Zhang</a>, <a href="/search/?searchtype=author&query=Dai%2C+Y">Yuqi Dai</a>, <a href="/search/?searchtype=author&query=Zeng%2C+C">Cheng Zeng</a>, <a href="/search/?searchtype=author&query=Wang%2C+J">Jingyu Wang</a>, <a href="/search/?searchtype=author&query=Liao%2C+J">Jianxin Liao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12834v1-abstract-short" style="display: inline;"> In-band network telemetry (INT) is essential to network management due to its real-time visibility. However, because of the rapid increase in network devices and services, it has become crucial to have targeted access to detailed network information in a dynamic network environment. This paper proposes an intelligent network telemetry system called NTP-INT to obtain more fine-grained network infor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12834v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12834v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12834v1-abstract-full" style="display: none;"> In-band network telemetry (INT) is essential to network management due to its real-time visibility. However, because of the rapid increase in network devices and services, it has become crucial to have targeted access to detailed network information in a dynamic network environment. This paper proposes an intelligent network telemetry system called NTP-INT to obtain more fine-grained network information on high-load switches. Specifically, NTP-INT consists of three modules: network traffic prediction module, network pruning module, and probe path planning module. Firstly, the network traffic prediction module adopts a Multi-Temporal Graph Neural Network (MTGNN) to predict future network traffic and identify high-load switches. Then, we design the network pruning algorithm to generate a subnetwork covering all high-load switches to reduce the complexity of probe path planning. Finally, the probe path planning module uses an attention-mechanism-based deep reinforcement learning (DEL) model to plan efficient probe paths in the network slice. The experimental results demonstrate that NTP-INT can acquire more precise network information on high-load switches while decreasing the control overhead by 50\%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12834v1-abstract-full').style.display = 'none'; document.getElementById('2502.12834v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12817">arXiv:2502.12817</a> <span> [<a href="https://arxiv.org/pdf/2502.12817">pdf</a>, <a href="https://arxiv.org/format/2502.12817">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> An Attention-Assisted AI Model for Real-Time Underwater Sound Speed Estimation Leveraging Remote Sensing Sea Surface Temperature Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wu%2C+P">Pengfei Wu</a>, <a href="/search/?searchtype=author&query=Huang%2C+W">Wei Huang</a>, <a href="/search/?searchtype=author&query=Shi%2C+Y">Yujie Shi</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12817v2-abstract-short" style="display: inline;"> The estimation of underwater sound velocity distribution serves as a critical basis for facilitating effective underwater communication and precise positioning, given that variations in sound velocity influence the path of signal transmission. Conventional techniques for the direct measurement of sound velocity, as well as methods that involve the inversion of sound velocity utilizing acoustic fie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12817v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12817v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12817v2-abstract-full" style="display: none;"> The estimation of underwater sound velocity distribution serves as a critical basis for facilitating effective underwater communication and precise positioning, given that variations in sound velocity influence the path of signal transmission. Conventional techniques for the direct measurement of sound velocity, as well as methods that involve the inversion of sound velocity utilizing acoustic field data, necessitate on--site data collection. This requirement not only places high demands on device deployment, but also presents challenges in achieving real-time estimation of sound velocity distribution. In order to construct a real-time sound velocity field and eliminate the need for underwater onsite data measurement operations, we propose a self-attention embedded multimodal data fusion convolutional neural network (SA-MDF-CNN) for real-time underwater sound speed profile (SSP) estimation. The proposed model seeks to elucidate the inherent relationship between remote sensing sea surface temperature (SST) data, the primary component characteristics of historical SSPs, and their spatial coordinates. This is achieved by employing CNNs and attention mechanisms to extract local and global correlations from the input data, respectively. The ultimate objective is to facilitate a rapid and precise estimation of sound velocity distribution within a specified task area. Experimental results show that the method proposed in this paper has lower root mean square error (RMSE) and stronger robustness than other state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12817v2-abstract-full').style.display = 'none'; document.getElementById('2502.12817v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12735">arXiv:2502.12735</a> <span> [<a href="https://arxiv.org/pdf/2502.12735">pdf</a>, <a href="https://arxiv.org/format/2502.12735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Task-Oriented Semantic Communication for Stereo-Vision 3D Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cao%2C+Z">Zijian Cao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hua Zhang</a>, <a href="/search/?searchtype=author&query=Liang%2C+L">Le Liang</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Haotian Wang</a>, <a href="/search/?searchtype=author&query=Jin%2C+S">Shi Jin</a>, <a href="/search/?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12735v1-abstract-short" style="display: inline;"> With the development of computer vision, 3D object detection has become increasingly important in many real-world applications. Limited by the computing power of sensor-side hardware, the detection task is sometimes deployed on remote computing devices or the cloud to execute complex algorithms, which brings massive data transmission overhead. In response, this paper proposes an optical flow-drive… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12735v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12735v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12735v1-abstract-full" style="display: none;"> With the development of computer vision, 3D object detection has become increasingly important in many real-world applications. Limited by the computing power of sensor-side hardware, the detection task is sometimes deployed on remote computing devices or the cloud to execute complex algorithms, which brings massive data transmission overhead. In response, this paper proposes an optical flow-driven semantic communication framework for the stereo-vision 3D object detection task. The proposed framework fully exploits the dependence of stereo-vision 3D detection on semantic information in images and prioritizes the transmission of this semantic information to reduce total transmission data sizes while ensuring the detection accuracy. Specifically, we develop an optical flow-driven module to jointly extract and recover semantics from the left and right images to reduce the loss of the left-right photometric alignment semantic information and improve the accuracy of depth inference. Then, we design a 2D semantic extraction module to identify and extract semantic meaning around the objects to enhance the transmission of semantic information in the key areas. Finally, a fusion network is used to fuse the recovered semantics, and reconstruct the stereo-vision images for 3D detection. Simulation results show that the proposed method improves the detection accuracy by nearly 70% and outperforms the traditional method, especially for the low signal-to-noise ratio regime. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12735v1-abstract-full').style.display = 'none'; document.getElementById('2502.12735v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12671">arXiv:2502.12671</a> <span> [<a href="https://arxiv.org/pdf/2502.12671">pdf</a>, <a href="https://arxiv.org/format/2502.12671">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Baichuan-M1: Pushing the Medical Capability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+B">Bingning Wang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Haizhou Zhao</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Huozhi Zhou</a>, <a href="/search/?searchtype=author&query=Song%2C+L">Liang Song</a>, <a href="/search/?searchtype=author&query=Xu%2C+M">Mingyu Xu</a>, <a href="/search/?searchtype=author&query=Cheng%2C+W">Wei Cheng</a>, <a href="/search/?searchtype=author&query=Zeng%2C+X">Xiangrong Zeng</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yupeng Zhang</a>, <a href="/search/?searchtype=author&query=Huo%2C+Y">Yuqi Huo</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zecheng Wang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Z">Zhengyun Zhao</a>, <a href="/search/?searchtype=author&query=Pan%2C+D">Da Pan</a>, <a href="/search/?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/?searchtype=author&query=Kou%2C+F">Fei Kou</a>, <a href="/search/?searchtype=author&query=Li%2C+F">Fei Li</a>, <a href="/search/?searchtype=author&query=Chen%2C+F">Fuzhong Chen</a>, <a href="/search/?searchtype=author&query=Dong%2C+G">Guosheng Dong</a>, <a href="/search/?searchtype=author&query=Liu%2C+H">Han Liu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongda Zhang</a>, <a href="/search/?searchtype=author&query=He%2C+J">Jin He</a>, <a href="/search/?searchtype=author&query=Yang%2C+J">Jinjie Yang</a>, <a href="/search/?searchtype=author&query=Wu%2C+K">Kangxi Wu</a>, <a href="/search/?searchtype=author&query=Wu%2C+K">Kegeng Wu</a>, <a href="/search/?searchtype=author&query=Su%2C+L">Lei Su</a>, <a href="/search/?searchtype=author&query=Niu%2C+L">Linlin Niu</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12671v1-abstract-short" style="display: inline;"> The current generation of large language models (LLMs) is typically designed for broad, general-purpose applications, while domain-specific LLMs, especially in vertical fields like medicine, remain relatively scarce. In particular, the development of highly efficient and practical LLMs for the medical domain is challenging due to the complexity of medical knowledge and the limited availability of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12671v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12671v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12671v1-abstract-full" style="display: none;"> The current generation of large language models (LLMs) is typically designed for broad, general-purpose applications, while domain-specific LLMs, especially in vertical fields like medicine, remain relatively scarce. In particular, the development of highly efficient and practical LLMs for the medical domain is challenging due to the complexity of medical knowledge and the limited availability of high-quality data. To bridge this gap, we introduce Baichuan-M1, a series of large language models specifically optimized for medical applications. Unlike traditional approaches that simply continue pretraining on existing models or apply post-training to a general base model, Baichuan-M1 is trained from scratch with a dedicated focus on enhancing medical capabilities. Our model is trained on 20 trillion tokens and incorporates a range of effective training methods that strike a balance between general capabilities and medical expertise. As a result, Baichuan-M1 not only performs strongly across general domains such as mathematics and coding but also excels in specialized medical fields. We have open-sourced Baichuan-M1-14B, a mini version of our model, which can be accessed through the following links. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12671v1-abstract-full').style.display = 'none'; document.getElementById('2502.12671v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, technical report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12454">arXiv:2502.12454</a> <span> [<a href="https://arxiv.org/pdf/2502.12454">pdf</a>, <a href="https://arxiv.org/format/2502.12454">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Zero-Shot Facial Emotion Annotation with Large Language Models: A Multi-Class and Multi-Frame Approach in DailyLife </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+H">He Zhang</a>, <a href="/search/?searchtype=author&query=Fu%2C+X">Xinyi Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12454v1-abstract-short" style="display: inline;"> This study investigates the feasibility and performance of using large language models (LLMs) to automatically annotate human emotions in everyday scenarios. We conducted experiments on the DailyLife subset of the publicly available FERV39k dataset, employing the GPT-4o-mini model for rapid, zero-shot labeling of key frames extracted from video segments. Under a seven-class emotion taxonomy ("Angr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12454v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12454v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12454v1-abstract-full" style="display: none;"> This study investigates the feasibility and performance of using large language models (LLMs) to automatically annotate human emotions in everyday scenarios. We conducted experiments on the DailyLife subset of the publicly available FERV39k dataset, employing the GPT-4o-mini model for rapid, zero-shot labeling of key frames extracted from video segments. Under a seven-class emotion taxonomy ("Angry," "Disgust," "Fear," "Happy," "Neutral," "Sad," "Surprise"), the LLM achieved an average precision of approximately 50%. In contrast, when limited to ternary emotion classification (negative/neutral/positive), the average precision increased to approximately 64%. Additionally, we explored a strategy that integrates multiple frames within 1-2 second video clips to enhance labeling performance and reduce costs. The results indicate that this approach can slightly improve annotation accuracy. Overall, our preliminary findings highlight the potential application of zero-shot LLMs in human facial emotion annotation tasks, offering new avenues for reducing labeling costs and broadening the applicability of LLMs in complex multimodal environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12454v1-abstract-full').style.display = 'none'; document.getElementById('2502.12454v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12421">arXiv:2502.12421</a> <span> [<a href="https://arxiv.org/pdf/2502.12421">pdf</a>, <a href="https://arxiv.org/format/2502.12421">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Wi-Chat: Large Language Model Powered Wi-Fi Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+H">Haopeng Zhang</a>, <a href="/search/?searchtype=author&query=Ren%2C+Y">Yili Ren</a>, <a href="/search/?searchtype=author&query=Yuan%2C+H">Haohan Yuan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jingzhe Zhang</a>, <a href="/search/?searchtype=author&query=Shen%2C+Y">Yitong Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12421v1-abstract-short" style="display: inline;"> Recent advancements in Large Language Models (LLMs) have demonstrated remarkable capabilities across diverse tasks. However, their potential to integrate physical model knowledge for real-world signal interpretation remains largely unexplored. In this work, we introduce Wi-Chat, the first LLM-powered Wi-Fi-based human activity recognition system. We demonstrate that LLMs can process raw Wi-Fi sign… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12421v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12421v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12421v1-abstract-full" style="display: none;"> Recent advancements in Large Language Models (LLMs) have demonstrated remarkable capabilities across diverse tasks. However, their potential to integrate physical model knowledge for real-world signal interpretation remains largely unexplored. In this work, we introduce Wi-Chat, the first LLM-powered Wi-Fi-based human activity recognition system. We demonstrate that LLMs can process raw Wi-Fi signals and infer human activities by incorporating Wi-Fi sensing principles into prompts. Our approach leverages physical model insights to guide LLMs in interpreting Channel State Information (CSI) data without traditional signal processing techniques. Through experiments on real-world Wi-Fi datasets, we show that LLMs exhibit strong reasoning capabilities, achieving zero-shot activity recognition. These findings highlight a new paradigm for Wi-Fi sensing, expanding LLM applications beyond conventional language tasks and enhancing the accessibility of wireless sensing for real-world deployments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12421v1-abstract-full').style.display = 'none'; document.getElementById('2502.12421v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12409">arXiv:2502.12409</a> <span> [<a href="https://arxiv.org/pdf/2502.12409">pdf</a>, <a href="https://arxiv.org/format/2502.12409">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Astrophysics of Galaxies">astro-ph.GA</span> </div> </div> <p class="title is-5 mathjax"> Dominant Role of Coplanar Inflows in Driving Disk Evolution Revealed by Gas-Phase Metallicity Gradients </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lyu%2C+C">Cheqiu Lyu</a>, <a href="/search/?searchtype=author&query=Wang%2C+E">Enci Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongxin Zhang</a>, <a href="/search/?searchtype=author&query=Peng%2C+Y">Yingjie Peng</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/?searchtype=author&query=Li%2C+H">Haixin Li</a>, <a href="/search/?searchtype=author&query=Ma%2C+C">Chengyu Ma</a>, <a href="/search/?searchtype=author&query=Yu%2C+H">Haoran Yu</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zeyu Chen</a>, <a href="/search/?searchtype=author&query=Jia%2C+C">Cheng Jia</a>, <a href="/search/?searchtype=author&query=Kong%2C+X">Xu Kong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12409v1-abstract-short" style="display: inline;"> Using spatially resolved spectroscopic data from the MaNGA sample, we investigate the parameters influencing the radial gradients of gas-phase metallicity ($\nabla\log(\mathrm{O/H})$), to determine whether disk formation is primarily driven by coplanar gas inflow or by the independent evolution of distinct regions within the disk. Our results show that $\nabla \log(\mathrm{O/H})$ strongly correlat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12409v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12409v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12409v1-abstract-full" style="display: none;"> Using spatially resolved spectroscopic data from the MaNGA sample, we investigate the parameters influencing the radial gradients of gas-phase metallicity ($\nabla\log(\mathrm{O/H})$), to determine whether disk formation is primarily driven by coplanar gas inflow or by the independent evolution of distinct regions within the disk. Our results show that $\nabla \log(\mathrm{O/H})$ strongly correlates with local gas-phase metallicity at a given stellar mass, with steeper gradients observed in metal-poorer disks. This trend supports the coplanar gas inflow scenario, wherein the gas is progressively enriched by in situ star formation as it flows inward. In contrast, the radial gradient of stellar mass surface density shows very weak correlations with $\nabla \log(\mathrm{O/H})$, which is inconsistent with the independent evolution mode, where gas inflow, star formation, and metal enrichment occur independently within each annulus of the disk. Furthermore, we find that $\nabla \log(\mathrm{O/H})$ is also closely correlated with an indicator of local gas turbulence $蟽_{\mathrm{gas}}/R_{\mathrm{e}}$, highlighting the competing roles of turbulence and coplanar inflow in shaping metallicity gradients. Our results provide indirect observational evidence supporting coplanar gas inflow as the driving mechanism for disk evolution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12409v1-abstract-full').style.display = 'none'; document.getElementById('2502.12409v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 5+4 figures. Accepted by ApJL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12330">arXiv:2502.12330</a> <span> [<a href="https://arxiv.org/pdf/2502.12330">pdf</a>, <a href="https://arxiv.org/format/2502.12330">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> X-IL: Exploring the Design Space of Imitation Learning Policies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jia%2C+X">Xiaogang Jia</a>, <a href="/search/?searchtype=author&query=Donat%2C+A">Atalay Donat</a>, <a href="/search/?searchtype=author&query=Huang%2C+X">Xi Huang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+X">Xuan Zhao</a>, <a href="/search/?searchtype=author&query=Blessing%2C+D">Denis Blessing</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyi Zhou</a>, <a href="/search/?searchtype=author&query=Wang%2C+H+A">Han A. Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hanyi Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Q">Qian Wang</a>, <a href="/search/?searchtype=author&query=Lioutikov%2C+R">Rudolf Lioutikov</a>, <a href="/search/?searchtype=author&query=Neumann%2C+G">Gerhard Neumann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12330v2-abstract-short" style="display: inline;"> Designing modern imitation learning (IL) policies requires making numerous decisions, including the selection of feature encoding, architecture, policy representation, and more. As the field rapidly advances, the range of available options continues to grow, creating a vast and largely unexplored design space for IL policies. In this work, we present X-IL, an accessible open-source framework desig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12330v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12330v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12330v2-abstract-full" style="display: none;"> Designing modern imitation learning (IL) policies requires making numerous decisions, including the selection of feature encoding, architecture, policy representation, and more. As the field rapidly advances, the range of available options continues to grow, creating a vast and largely unexplored design space for IL policies. In this work, we present X-IL, an accessible open-source framework designed to systematically explore this design space. The framework's modular design enables seamless swapping of policy components, such as backbones (e.g., Transformer, Mamba, xLSTM) and policy optimization techniques (e.g., Score-matching, Flow-matching). This flexibility facilitates comprehensive experimentation and has led to the discovery of novel policy configurations that outperform existing methods on recent robot learning benchmarks. Our experiments demonstrate not only significant performance gains but also provide valuable insights into the strengths and weaknesses of various design choices. This study serves as both a practical reference for practitioners and a foundation for guiding future research in imitation learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12330v2-abstract-full').style.display = 'none'; document.getElementById('2502.12330v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12320">arXiv:2502.12320</a> <span> [<a href="https://arxiv.org/pdf/2502.12320">pdf</a>, <a href="https://arxiv.org/format/2502.12320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Fusing Point Cloud and Visual Representations for Imitation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Donat%2C+A">Atalay Donat</a>, <a href="/search/?searchtype=author&query=Jia%2C+X">Xiaogang Jia</a>, <a href="/search/?searchtype=author&query=Huang%2C+X">Xi Huang</a>, <a href="/search/?searchtype=author&query=Taranovic%2C+A">Aleksandar Taranovic</a>, <a href="/search/?searchtype=author&query=Blessing%2C+D">Denis Blessing</a>, <a href="/search/?searchtype=author&query=Li%2C+G">Ge Li</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyi Zhou</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hanyi Zhang</a>, <a href="/search/?searchtype=author&query=Lioutikov%2C+R">Rudolf Lioutikov</a>, <a href="/search/?searchtype=author&query=Neumann%2C+G">Gerhard Neumann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12320v2-abstract-short" style="display: inline;"> Learning for manipulation requires using policies that have access to rich sensory information such as point clouds or RGB images. Point clouds efficiently capture geometric structures, making them essential for manipulation tasks in imitation learning. In contrast, RGB images provide rich texture and semantic information that can be crucial for certain tasks. Existing approaches for fusing both m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12320v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12320v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12320v2-abstract-full" style="display: none;"> Learning for manipulation requires using policies that have access to rich sensory information such as point clouds or RGB images. Point clouds efficiently capture geometric structures, making them essential for manipulation tasks in imitation learning. In contrast, RGB images provide rich texture and semantic information that can be crucial for certain tasks. Existing approaches for fusing both modalities assign 2D image features to point clouds. However, such approaches often lose global contextual information from the original images. In this work, we propose FPV-Net, a novel imitation learning method that effectively combines the strengths of both point cloud and RGB modalities. Our method conditions the point-cloud encoder on global and local image tokens using adaptive layer norm conditioning, leveraging the beneficial properties of both modalities. Through extensive experiments on the challenging RoboCasa benchmark, we demonstrate the limitations of relying on either modality alone and show that our method achieves state-of-the-art performance across all tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12320v2-abstract-full').style.display = 'none'; document.getElementById('2502.12320v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12202">arXiv:2502.12202</a> <span> [<a href="https://arxiv.org/pdf/2502.12202">pdf</a>, <a href="https://arxiv.org/format/2502.12202">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> BoT: Breaking Long Thought Processes of o1-like Large Language Models through Backdoor Attack </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhu%2C+Z">Zihao Zhu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongbao Zhang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Mingda Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+R">Ruotong Wang</a>, <a href="/search/?searchtype=author&query=Wu%2C+G">Guanzong Wu</a>, <a href="/search/?searchtype=author&query=Xu%2C+K">Ke Xu</a>, <a href="/search/?searchtype=author&query=Wu%2C+B">Baoyuan Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12202v1-abstract-short" style="display: inline;"> Longer thought, better performance: large language models with deep reasoning capabilities, particularly o1-like models, have demonstrated remarkable performance by generating extensive thought processes during inference. This trade-off reveals a potential vulnerability: adversaries could compromise model performance by forcing immediate responses without thought processes. To this end, in this pa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12202v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12202v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12202v1-abstract-full" style="display: none;"> Longer thought, better performance: large language models with deep reasoning capabilities, particularly o1-like models, have demonstrated remarkable performance by generating extensive thought processes during inference. This trade-off reveals a potential vulnerability: adversaries could compromise model performance by forcing immediate responses without thought processes. To this end, in this paper, we introduce a novel attack scenario targeting the long thought processes of o1-like models and propose BoT (Break CoT), which can selectively break intrinsic reasoning mechanisms through backdoor attacks. BoT constructs poisoned datasets with designed triggers and injects backdoor by either supervised fine-tuning or direct preference optimization. When triggered, the model directly generates answers without thought processes, while maintaining normal reasoning capabilities for clean inputs. Extensive experiments on open-source o1-like models, including recent DeepSeek-R1, demonstrate that BoT nearly achieves high attack success rates while maintaining clean accuracy, highlighting the critical safety risk in current models. Furthermore, the relationship between task difficulty and helpfulness reveals a potential application for good, enabling users to customize model behavior based on task complexity. Code is available at \href{https://github.com/zihao-ai/BoT}{https://github.com/zihao-ai/BoT}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12202v1-abstract-full').style.display = 'none'; document.getElementById('2502.12202v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12098">arXiv:2502.12098</a> <span> [<a href="https://arxiv.org/pdf/2502.12098">pdf</a>, <a href="https://arxiv.org/format/2502.12098">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Bandwidth-Adaptive Spatiotemporal Correspondence Identification for Collaborative Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gao%2C+P">Peng Gao</a>, <a href="/search/?searchtype=author&query=Jose%2C+W+J">Williard Joshua Jose</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12098v1-abstract-short" style="display: inline;"> Correspondence identification (CoID) is an essential capability in multi-robot collaborative perception, which enables a group of robots to consistently refer to the same objects within their respective fields of view. In real-world applications, such as connected autonomous driving, vehicles face challenges in directly sharing raw observations due to limited communication bandwidth. In order to a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12098v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12098v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12098v1-abstract-full" style="display: none;"> Correspondence identification (CoID) is an essential capability in multi-robot collaborative perception, which enables a group of robots to consistently refer to the same objects within their respective fields of view. In real-world applications, such as connected autonomous driving, vehicles face challenges in directly sharing raw observations due to limited communication bandwidth. In order to address this challenge, we propose a novel approach for bandwidth-adaptive spatiotemporal CoID in collaborative perception. This approach allows robots to progressively select partial spatiotemporal observations and share with others, while adapting to communication constraints that dynamically change over time. We evaluate our approach across various scenarios in connected autonomous driving simulations. Experimental results validate that our approach enables CoID and adapts to dynamic communication bandwidth changes. In addition, our approach achieves 8%-56% overall improvements in terms of covisible object retrieval for CoID and data sharing efficiency, which outperforms previous techniques and achieves the state-of-the-art performance. More information is available at: https://gaopeng5.github.io/acoid. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12098v1-abstract-full').style.display = 'none'; document.getElementById('2502.12098v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11946">arXiv:2502.11946</a> <span> [<a href="https://arxiv.org/pdf/2502.11946">pdf</a>, <a href="https://arxiv.org/format/2502.11946">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Step-Audio: Unified Understanding and Generation in Intelligent Speech Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+A">Ailin Huang</a>, <a href="/search/?searchtype=author&query=Wu%2C+B">Boyong Wu</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Bruce Wang</a>, <a href="/search/?searchtype=author&query=Yan%2C+C">Chao Yan</a>, <a href="/search/?searchtype=author&query=Hu%2C+C">Chen Hu</a>, <a href="/search/?searchtype=author&query=Feng%2C+C">Chengli Feng</a>, <a href="/search/?searchtype=author&query=Tian%2C+F">Fei Tian</a>, <a href="/search/?searchtype=author&query=Shen%2C+F">Feiyu Shen</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jingbei Li</a>, <a href="/search/?searchtype=author&query=Chen%2C+M">Mingrui Chen</a>, <a href="/search/?searchtype=author&query=Liu%2C+P">Peng Liu</a>, <a href="/search/?searchtype=author&query=Miao%2C+R">Ruihang Miao</a>, <a href="/search/?searchtype=author&query=You%2C+W">Wang You</a>, <a href="/search/?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xuerui Yang</a>, <a href="/search/?searchtype=author&query=Huang%2C+Y">Yechang Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/?searchtype=author&query=Gong%2C+Z">Zheng Gong</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zixin Zhang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyu Zhou</a>, <a href="/search/?searchtype=author&query=Sun%2C+J">Jianjian Sun</a>, <a href="/search/?searchtype=author&query=Li%2C+B">Brian Li</a>, <a href="/search/?searchtype=author&query=Feng%2C+C">Chengting Feng</a>, <a href="/search/?searchtype=author&query=Wan%2C+C">Changyi Wan</a>, <a href="/search/?searchtype=author&query=Hu%2C+H">Hanpeng Hu</a> , et al. (120 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11946v2-abstract-short" style="display: inline;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11946v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11946v2-abstract-full" style="display: none;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contributions include: 1) a 130B-parameter unified speech-text multi-modal model that achieves unified understanding and generation, with the Step-Audio-Chat version open-sourced; 2) a generative speech data engine that establishes an affordable voice cloning framework and produces the open-sourced lightweight Step-Audio-TTS-3B model through distillation; 3) an instruction-driven fine control system enabling dynamic adjustments across dialects, emotions, singing, and RAP; 4) an enhanced cognitive architecture augmented with tool calling and role-playing abilities to manage complex tasks effectively. Based on our new StepEval-Audio-360 evaluation benchmark, Step-Audio achieves state-of-the-art performance in human evaluations, especially in terms of instruction following. On open-source benchmarks like LLaMA Question, shows 9.3% average performance improvement, demonstrating our commitment to advancing the development of open-source multi-modal language technologies. Our code and models are available at https://github.com/stepfun-ai/Step-Audio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'none'; document.getElementById('2502.11946v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11916">arXiv:2502.11916</a> <span> [<a href="https://arxiv.org/pdf/2502.11916">pdf</a>, <a href="https://arxiv.org/format/2502.11916">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EssayJudge: A Multi-Granular Benchmark for Assessing Automated Essay Scoring Capabilities of Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Su%2C+J">Jiamin Su</a>, <a href="/search/?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/?searchtype=author&query=Fu%2C+F">Fangteng Fu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Han Zhang</a>, <a href="/search/?searchtype=author&query=Ye%2C+J">Jingheng Ye</a>, <a href="/search/?searchtype=author&query=Liu%2C+X">Xiang Liu</a>, <a href="/search/?searchtype=author&query=Huo%2C+J">Jiahao Huo</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Huiyu Zhou</a>, <a href="/search/?searchtype=author&query=Hu%2C+X">Xuming Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11916v1-abstract-short" style="display: inline;"> Automated Essay Scoring (AES) plays a crucial role in educational assessment by providing scalable and consistent evaluations of writing tasks. However, traditional AES systems face three major challenges: (1) reliance on handcrafted features that limit generalizability, (2) difficulty in capturing fine-grained traits like coherence and argumentation, and (3) inability to handle multimodal context… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11916v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11916v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11916v1-abstract-full" style="display: none;"> Automated Essay Scoring (AES) plays a crucial role in educational assessment by providing scalable and consistent evaluations of writing tasks. However, traditional AES systems face three major challenges: (1) reliance on handcrafted features that limit generalizability, (2) difficulty in capturing fine-grained traits like coherence and argumentation, and (3) inability to handle multimodal contexts. In the era of Multimodal Large Language Models (MLLMs), we propose EssayJudge, the first multimodal benchmark to evaluate AES capabilities across lexical-, sentence-, and discourse-level traits. By leveraging MLLMs' strengths in trait-specific scoring and multimodal context understanding, EssayJudge aims to offer precise, context-rich evaluations without manual feature engineering, addressing longstanding AES limitations. Our experiments with 18 representative MLLMs reveal gaps in AES performance compared to human evaluation, particularly in discourse-level traits, highlighting the need for further advancements in MLLM-based AES research. Our dataset and code will be available upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11916v1-abstract-full').style.display = 'none'; document.getElementById('2502.11916v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">JS and YY are co-first authors. XH is the corresponding author</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11897">arXiv:2502.11897</a> <span> [<a href="https://arxiv.org/pdf/2502.11897">pdf</a>, <a href="https://arxiv.org/format/2502.11897">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DLFR-VAE: Dynamic Latent Frame Rate VAE for Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yuan%2C+Z">Zhihang Yuan</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">Siyuan Wang</a>, <a href="/search/?searchtype=author&query=Xie%2C+R">Rui Xie</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hanling Zhang</a>, <a href="/search/?searchtype=author&query=Fang%2C+T">Tongcheng Fang</a>, <a href="/search/?searchtype=author&query=Shang%2C+Y">Yuzhang Shang</a>, <a href="/search/?searchtype=author&query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11897v1-abstract-short" style="display: inline;"> In this paper, we propose the Dynamic Latent Frame Rate VAE (DLFR-VAE), a training-free paradigm that can make use of adaptive temporal compression in latent space. While existing video generative models apply fixed compression rates via pretrained VAE, we observe that real-world video content exhibits substantial temporal non-uniformity, with high-motion segments containing more information than… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11897v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11897v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11897v1-abstract-full" style="display: none;"> In this paper, we propose the Dynamic Latent Frame Rate VAE (DLFR-VAE), a training-free paradigm that can make use of adaptive temporal compression in latent space. While existing video generative models apply fixed compression rates via pretrained VAE, we observe that real-world video content exhibits substantial temporal non-uniformity, with high-motion segments containing more information than static scenes. Based on this insight, DLFR-VAE dynamically adjusts the latent frame rate according to the content complexity. Specifically, DLFR-VAE comprises two core innovations: (1) A Dynamic Latent Frame Rate Scheduler that partitions videos into temporal chunks and adaptively determines optimal frame rates based on information-theoretic content complexity, and (2) A training-free adaptation mechanism that transforms pretrained VAE architectures into a dynamic VAE that can process features with variable frame rates. Our simple but effective DLFR-VAE can function as a plug-and-play module, seamlessly integrating with existing video generation models and accelerating the video generation process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11897v1-abstract-full').style.display = 'none'; document.getElementById('2502.11897v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11811">arXiv:2502.11811</a> <span> [<a href="https://arxiv.org/pdf/2502.11811">pdf</a>, <a href="https://arxiv.org/format/2502.11811">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> FineFilter: A Fine-grained Noise Filtering Mechanism for Retrieval-Augmented Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Q">Qianchi Zhang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hainan Zhang</a>, <a href="/search/?searchtype=author&query=Pang%2C+L">Liang Pang</a>, <a href="/search/?searchtype=author&query=Zheng%2C+H">Hongwei Zheng</a>, <a href="/search/?searchtype=author&query=Tong%2C+Y">Yongxin Tong</a>, <a href="/search/?searchtype=author&query=Zheng%2C+Z">Zhiming Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11811v2-abstract-short" style="display: inline;"> Retrieved documents containing noise will hinder Retrieval-Augmented Generation (RAG) from detecting answer clues, necessitating noise filtering mechanisms to enhance accuracy. Existing methods use re-ranking or summarization to identify the most relevant sentences, but directly and accurately locating answer clues from these large-scale and complex documents remains challenging. Unlike these docu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11811v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11811v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11811v2-abstract-full" style="display: none;"> Retrieved documents containing noise will hinder Retrieval-Augmented Generation (RAG) from detecting answer clues, necessitating noise filtering mechanisms to enhance accuracy. Existing methods use re-ranking or summarization to identify the most relevant sentences, but directly and accurately locating answer clues from these large-scale and complex documents remains challenging. Unlike these document-level operations, we treat noise filtering as a sentence-level MinMax optimization problem: first identifying the potential clues from multiple documents using contextual information, then ranking them by relevance, and finally retaining the least clues through truncation. In this paper, we propose FineFilter, a novel fine-grained noise filtering mechanism for RAG consisting of a clue extractor, a re-ranker, and a truncator. We optimize each module to tackle complex reasoning challenges: (1) Clue extractor firstly uses sentences containing the answer and similar ones as fine-tuned targets, aiming at extracting sufficient potential clues; (2) Re-ranker is trained to prioritize effective clues based on the real feedback from generation module, with clues capable of generating correct answer as positive samples and others as negative; (3) Truncator takes the minimum clues needed to answer the question (truncation point) as fine-tuned targets, and performs truncation on the re-ranked clues to achieve fine-grained noise filtering. Experiments on three QA datasets demonstrate that FineFilter significantly outperforms baselines in terms of performance and inference cost. Further analysis on each module shows the effectiveness of our optimizations for complex reasoning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11811v2-abstract-full').style.display = 'none'; document.getElementById('2502.11811v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11806">arXiv:2502.11806</a> <span> [<a href="https://arxiv.org/pdf/2502.11806">pdf</a>, <a href="https://arxiv.org/format/2502.11806">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploring Translation Mechanism of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongbin Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+K">Kehai Chen</a>, <a href="/search/?searchtype=author&query=Bai%2C+X">Xuefeng Bai</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xiucheng Li</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Min Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11806v1-abstract-short" style="display: inline;"> Large language models (LLMs) have succeeded remarkably in multilingual translation tasks. However, the inherent translation mechanisms of LLMs remain poorly understood, largely due to sophisticated architectures and vast parameter scales. In response to this issue, this study explores the translation mechanism of LLM from the perspective of computational components (e.g., attention heads and MLPs)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11806v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11806v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11806v1-abstract-full" style="display: none;"> Large language models (LLMs) have succeeded remarkably in multilingual translation tasks. However, the inherent translation mechanisms of LLMs remain poorly understood, largely due to sophisticated architectures and vast parameter scales. In response to this issue, this study explores the translation mechanism of LLM from the perspective of computational components (e.g., attention heads and MLPs). Path patching is utilized to explore causal relationships between components, detecting those crucial for translation tasks and subsequently analyzing their behavioral patterns in human-interpretable terms. Comprehensive analysis reveals that translation is predominantly facilitated by a sparse subset of specialized attention heads (less than 5\%), which extract source language, indicator, and positional features. MLPs subsequently integrate and process these features by transiting towards English-centric latent representations. Notably, building on the above findings, targeted fine-tuning of only 64 heads achieves translation improvement comparable to full-parameter tuning while preserving general capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11806v1-abstract-full').style.display = 'none'; document.getElementById('2502.11806v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11744">arXiv:2502.11744</a> <span> [<a href="https://arxiv.org/pdf/2502.11744">pdf</a>, <a href="https://arxiv.org/format/2502.11744">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FUNCTO: Function-Centric One-Shot Imitation Learning for Tool Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tang%2C+C">Chao Tang</a>, <a href="/search/?searchtype=author&query=Xiao%2C+A">Anxing Xiao</a>, <a href="/search/?searchtype=author&query=Deng%2C+Y">Yuhong Deng</a>, <a href="/search/?searchtype=author&query=Hu%2C+T">Tianrun Hu</a>, <a href="/search/?searchtype=author&query=Dong%2C+W">Wenlong Dong</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hanbo Zhang</a>, <a href="/search/?searchtype=author&query=Hsu%2C+D">David Hsu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11744v1-abstract-short" style="display: inline;"> Learning tool use from a single human demonstration video offers a highly intuitive and efficient approach to robot teaching. While humans can effortlessly generalize a demonstrated tool manipulation skill to diverse tools that support the same function (e.g., pouring with a mug versus a teapot), current one-shot imitation learning (OSIL) methods struggle to achieve this. A key challenge lies in e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11744v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11744v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11744v1-abstract-full" style="display: none;"> Learning tool use from a single human demonstration video offers a highly intuitive and efficient approach to robot teaching. While humans can effortlessly generalize a demonstrated tool manipulation skill to diverse tools that support the same function (e.g., pouring with a mug versus a teapot), current one-shot imitation learning (OSIL) methods struggle to achieve this. A key challenge lies in establishing functional correspondences between demonstration and test tools, considering significant geometric variations among tools with the same function (i.e., intra-function variations). To address this challenge, we propose FUNCTO (Function-Centric OSIL for Tool Manipulation), an OSIL method that establishes function-centric correspondences with a 3D functional keypoint representation, enabling robots to generalize tool manipulation skills from a single human demonstration video to novel tools with the same function despite significant intra-function variations. With this formulation, we factorize FUNCTO into three stages: (1) functional keypoint extraction, (2) function-centric correspondence establishment, and (3) functional keypoint-based action planning. We evaluate FUNCTO against exiting modular OSIL methods and end-to-end behavioral cloning methods through real-robot experiments on diverse tool manipulation tasks. The results demonstrate the superiority of FUNCTO when generalizing to novel tools with intra-function geometric variations. More details are available at https://sites.google.com/view/functo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11744v1-abstract-full').style.display = 'none'; document.getElementById('2502.11744v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11391">arXiv:2502.11391</a> <span> [<a href="https://arxiv.org/pdf/2502.11391">pdf</a>, <a href="https://arxiv.org/ps/2502.11391">ps</a>, <a href="https://arxiv.org/format/2502.11391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> </div> </div> <p class="title is-5 mathjax"> Excluded conformal minors of Birkhoff-von Neumann graphs with equal global forcing number and maximum anti-forcing number </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yaxian Zhang</a>, <a href="/search/?searchtype=author&query=Wu%2C+Y">Yan Wu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Heping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11391v1-abstract-short" style="display: inline;"> Global forcing number and maximum anti-forcing number of matchable graphs (graphs with a perfect matching) were proposed in completely different situations with applications in theoretical chemistry. Surprisingly for bipartite graphs and some nonbipartite graphs as solid bricks (or Birkhoff-von Neumann graphs) G, the global forcing number gf(G) is at least the maximum anti-forcing number Af(G). It… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11391v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11391v1-abstract-full" style="display: none;"> Global forcing number and maximum anti-forcing number of matchable graphs (graphs with a perfect matching) were proposed in completely different situations with applications in theoretical chemistry. Surprisingly for bipartite graphs and some nonbipartite graphs as solid bricks (or Birkhoff-von Neumann graphs) G, the global forcing number gf(G) is at least the maximum anti-forcing number Af(G). It is natural to consider when gf(G) = Af(G) holds. For convenience, we call a matchable graph G strongly uniform if each conformal matchable subgraph G' always satisfies gf(G') = Af(G'). In this article, by applying the ear decomposition theorem and discussing the existence of a Hamilton cycle with positions of chords, we give "excluded conformal minors" and "structural" characterizations of matchable bipartite graphs and Birkhoff-von Neumann graphs that are strongly uniform respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11391v1-abstract-full').style.display = 'none'; document.getElementById('2502.11391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11201">arXiv:2502.11201</a> <span> [<a href="https://arxiv.org/pdf/2502.11201">pdf</a>, <a href="https://arxiv.org/format/2502.11201">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bridging the Gap: Enabling Natural Language Queries for NoSQL Databases through Text-to-NoSQL Translation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lu%2C+J">Jinwei Lu</a>, <a href="/search/?searchtype=author&query=Song%2C+Y">Yuanfeng Song</a>, <a href="/search/?searchtype=author&query=Qin%2C+Z">Zhiqian Qin</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Haodi Zhang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+C">Chen Zhang</a>, <a href="/search/?searchtype=author&query=Wong%2C+R+C">Raymond Chi-Wing Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11201v2-abstract-short" style="display: inline;"> NoSQL databases have become increasingly popular due to their outstanding performance in handling large-scale, unstructured, and semi-structured data, highlighting the need for user-friendly interfaces to bridge the gap between non-technical users and complex database queries. In this paper, we introduce the Text-to-NoSQL task, which aims to convert natural language queries into NoSQL queries, the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11201v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11201v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11201v2-abstract-full" style="display: none;"> NoSQL databases have become increasingly popular due to their outstanding performance in handling large-scale, unstructured, and semi-structured data, highlighting the need for user-friendly interfaces to bridge the gap between non-technical users and complex database queries. In this paper, we introduce the Text-to-NoSQL task, which aims to convert natural language queries into NoSQL queries, thereby lowering the technical barrier for non-expert users. To promote research in this area, we developed a novel automated dataset construction process and released a large-scale and open-source dataset for this task, named TEND (short for Text-to-NoSQL Dataset). Additionally, we designed a SLM (Small Language Model)-assisted and RAG (Retrieval-augmented Generation)-assisted multi-step framework called SMART, which is specifically designed for Text-to-NoSQL conversion. To ensure comprehensive evaluation of the models, we also introduced a detailed set of metrics that assess the model's performance from both the query itself and its execution results. Our experimental results demonstrate the effectiveness of our approach and establish a benchmark for future research in this emerging field. We believe that our contributions will pave the way for more accessible and intuitive interactions with NoSQL databases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11201v2-abstract-full').style.display = 'none'; document.getElementById('2502.11201v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11110">arXiv:2502.11110</a> <span> [<a href="https://arxiv.org/pdf/2502.11110">pdf</a>, <a href="https://arxiv.org/format/2502.11110">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Ramp Up NTT in Record Time using GPU-Accelerated Algorithms and LLM-based Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cui%2C+Y">Yu Cui</a>, <a href="/search/?searchtype=author&query=Fu%2C+H">Hang Fu</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Licheng Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Haibin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11110v1-abstract-short" style="display: inline;"> Homomorphic encryption (HE) is a core building block in privacy-preserving machine learning (PPML), but HE is also widely known as its efficiency bottleneck. Therefore, many GPU-accelerated cryptographic schemes have been proposed to improve the performance of HE. However, these methods often require complex modifications tailored to specific algorithms and are tightly coupled with specific GPU an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11110v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11110v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11110v1-abstract-full" style="display: none;"> Homomorphic encryption (HE) is a core building block in privacy-preserving machine learning (PPML), but HE is also widely known as its efficiency bottleneck. Therefore, many GPU-accelerated cryptographic schemes have been proposed to improve the performance of HE. However, these methods often require complex modifications tailored to specific algorithms and are tightly coupled with specific GPU and operating systems. It is interesting to ask how to generally offer more practical GPU-accelerated cryptographic algorithm implementations. Given the powerful code generation capabilities of large language models (LLMs), we aim to explore their potential to automatically generate practical GPU-friendly algorithm code using CPU-friendly code. In this paper, we focus on number theoretic transform (NTT) -- the core mechanism of HE. We first develop and optimize a GPU-friendly NTT (GNTT) family that exploits PyTorch's fast matrix computation and precomputation, achieving an approximately 62x speedup -- a significant boost over existing ones. Then we explore GPU-friendly code generation using various LLMs, including DeepSeek-R1, OpenAI o1 and o3-mini. We discover many interesting findings throughout the process. For instance, somewhat surprisingly, our experiments demonstrate that DeepSeek-R1 significantly outperforms OpenAI o3-mini and o1, but still cannot beat our optimized protocol. The findings provide valuable insights for turbocharging PPML and enhancing code generation capabilities of LLMs. Codes are available at: https://github.com/LMPC-Lab/GenGPUCrypto. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11110v1-abstract-full').style.display = 'none'; document.getElementById('2502.11110v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11103">arXiv:2502.11103</a> <span> [<a href="https://arxiv.org/pdf/2502.11103">pdf</a>, <a href="https://arxiv.org/format/2502.11103">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Astrophysical Phenomena">astro-ph.HE</span> </div> </div> <p class="title is-5 mathjax"> Repeating fast radio bursts from synchrotron maser radiation in localized plasma blobs: Application to FRB 20121102A </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+X">Xiao Li</a>, <a href="/search/?searchtype=author&query=Lyu%2C+F">Fen Lyu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H+M">Hai Ming Zhang</a>, <a href="/search/?searchtype=author&query=Deng%2C+C">Can-Min Deng</a>, <a href="/search/?searchtype=author&query=Liang%2C+E">En-Wei Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11103v1-abstract-short" style="display: inline;"> The radiation physics of repeating fast radio bursts (FRBs) remains enigmatic. Motivated by the observed narrow-banded emission spectrum and ambiguous fringe pattern of the spectral peak frequency ($谓_{\rm pk}$) distribution of some repeating FRBs, such as FRB 20121102A, we propose that the bursts from repeating FRBs arise from synchrotron maser radiation in localized blobs within weakly magnetize… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11103v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11103v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11103v1-abstract-full" style="display: none;"> The radiation physics of repeating fast radio bursts (FRBs) remains enigmatic. Motivated by the observed narrow-banded emission spectrum and ambiguous fringe pattern of the spectral peak frequency ($谓_{\rm pk}$) distribution of some repeating FRBs, such as FRB 20121102A, we propose that the bursts from repeating FRBs arise from synchrotron maser radiation in localized blobs within weakly magnetized plasma that relativistically moves toward observers. Assuming the plasma moves toward the observers with a bulk Lorentz factor of $螕=100$ and the electron distribution in an individual blob is monoenergetic ($纬_{\rm e}\sim300$), our analysis shows that bright and narrow-banded radio bursts with peak flux density $\sim$ 1 ${\rm Jy}$ at peak frequency ($谓_{\rm pk}$) $\sim 3.85$ GHz can be produced by the synchrotron maser emission if the plasma blob has a magnetization factor of $蟽\sim10^{-5}$ and a frequency of $谓_{\rm P}\sim 4.5$ MHz. The spectrum of bursts with lower $谓_{\rm pk}$ tends to be narrower. Applying our model to the bursts of FRB 20121102A, the distributions of both the observed $谓_{\rm pk}$ and isotropic energy $E_{\rm iso}$ detected by the Arecibo telescope at the L band and the Green Bank Telescope at the C band are successfully reproduced. We find that the $谓_{\rm P}$ distribution exhibits several peaks, similar to those observed in the $谓_{\rm pk}$ distribution of FRB 20121102A. This implies that the synchrotron maser emission in FRB 20121102A is triggered in different plasma blobs with varying $谓_{\rm P}$, likely due to the inhomogeneity of relativistic electron number density. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11103v1-abstract-full').style.display = 'none'; document.getElementById('2502.11103v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by A&A</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11047">arXiv:2502.11047</a> <span> [<a href="https://arxiv.org/pdf/2502.11047">pdf</a>, <a href="https://arxiv.org/ps/2502.11047">ps</a>, <a href="https://arxiv.org/format/2502.11047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Search for the Cabibbo-suppressed decays $螞_c^{+}\to危^0K^{+}蟺^{0}$ and $螞_c^{+}\to危^0K^{+}蟺^{+}蟺^{-}$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=BESIII+Collaboration"> BESIII Collaboration</a>, <a href="/search/?searchtype=author&query=Ablikim%2C+M">M. Ablikim</a>, <a href="/search/?searchtype=author&query=Achasov%2C+M+N">M. N. Achasov</a>, <a href="/search/?searchtype=author&query=Adlarson%2C+P">P. Adlarson</a>, <a href="/search/?searchtype=author&query=Ai%2C+X+C">X. C. Ai</a>, <a href="/search/?searchtype=author&query=Aliberti%2C+R">R. Aliberti</a>, <a href="/search/?searchtype=author&query=Amoroso%2C+A">A. Amoroso</a>, <a href="/search/?searchtype=author&query=An%2C+Q">Q. An</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Y. Bai</a>, <a href="/search/?searchtype=author&query=Bakina%2C+O">O. Bakina</a>, <a href="/search/?searchtype=author&query=Ban%2C+Y">Y. Ban</a>, <a href="/search/?searchtype=author&query=Bao%2C+H+-">H. -R. Bao</a>, <a href="/search/?searchtype=author&query=Batozskaya%2C+V">V. Batozskaya</a>, <a href="/search/?searchtype=author&query=Begzsuren%2C+K">K. Begzsuren</a>, <a href="/search/?searchtype=author&query=Berger%2C+N">N. Berger</a>, <a href="/search/?searchtype=author&query=Berlowski%2C+M">M. Berlowski</a>, <a href="/search/?searchtype=author&query=Bertani%2C+M">M. Bertani</a>, <a href="/search/?searchtype=author&query=Bettoni%2C+D">D. Bettoni</a>, <a href="/search/?searchtype=author&query=Bianchi%2C+F">F. Bianchi</a>, <a href="/search/?searchtype=author&query=Bianco%2C+E">E. Bianco</a>, <a href="/search/?searchtype=author&query=Bortone%2C+A">A. Bortone</a>, <a href="/search/?searchtype=author&query=Boyko%2C+I">I. Boyko</a>, <a href="/search/?searchtype=author&query=Briere%2C+R+A">R. A. Briere</a>, <a href="/search/?searchtype=author&query=Brueggemann%2C+A">A. Brueggemann</a>, <a href="/search/?searchtype=author&query=Cai%2C+H">H. Cai</a> , et al. (687 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11047v1-abstract-short" style="display: inline;"> Utilizing 4.5 $fb^-$ of $e^+e^-$ annihilation data collected at center-of-mass energies ranging from 4599.53 MeV to 4698.82 MeV by the BESIII detector at the BEPCII collider, we search for the singly Cabibbo-suppressed hadronic decays $螞_{c}^{+}\to危^{0} K^{+}蟺^{0}$ and $螞_{c}^{+}\to危^{0}K^{+}蟺^+蟺^-$ with a single-tag method. No significant signals are observed for both decays. The upper limits on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11047v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11047v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11047v1-abstract-full" style="display: none;"> Utilizing 4.5 $fb^-$ of $e^+e^-$ annihilation data collected at center-of-mass energies ranging from 4599.53 MeV to 4698.82 MeV by the BESIII detector at the BEPCII collider, we search for the singly Cabibbo-suppressed hadronic decays $螞_{c}^{+}\to危^{0} K^{+}蟺^{0}$ and $螞_{c}^{+}\to危^{0}K^{+}蟺^+蟺^-$ with a single-tag method. No significant signals are observed for both decays. The upper limits on the branching fractions at the $90\%$ confidence level are determined to be $5.0\times 10^{-4}$ for $螞_{c}^{+}\to危^{0} K^{+}蟺^{0}$ and $6.5\times 10^{-4}$ for $螞_c^{+}\to危^0K^{+}蟺^{+}蟺^{-}$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11047v1-abstract-full').style.display = 'none'; document.getElementById('2502.11047v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10981">arXiv:2502.10981</a> <span> [<a href="https://arxiv.org/pdf/2502.10981">pdf</a>, <a href="https://arxiv.org/format/2502.10981">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> </div> </div> <p class="title is-5 mathjax"> Minimum forcing numbers of perfect matchings of circular and prismatic graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shi%2C+Q">Qiaoyun Shi</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Heping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10981v1-abstract-short" style="display: inline;"> Let $G$ be a graph with a perfect matching. Denote by $f(G)$ the minimum size of a matching in $G$ which is uniquely extendable to a perfect matching in $G$. Diwan (2019) proved by linear algebra that for $d$-hypercube $Q_d$ ($d\geq 2)$, $f(Q_n)=2^{d-2}$, settling a conjecture proposed by Pachter and Kim in 1998. Recently Mohammadian generalized this method to obtain a general result: for a bipart… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10981v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10981v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10981v1-abstract-full" style="display: none;"> Let $G$ be a graph with a perfect matching. Denote by $f(G)$ the minimum size of a matching in $G$ which is uniquely extendable to a perfect matching in $G$. Diwan (2019) proved by linear algebra that for $d$-hypercube $Q_d$ ($d\geq 2)$, $f(Q_n)=2^{d-2}$, settling a conjecture proposed by Pachter and Kim in 1998. Recently Mohammadian generalized this method to obtain a general result: for a bipartite graph $G$ on $n$ vertices, if there exists an involutory matrix $A$ on a field $F$ as a weighted adjacency matrix then $f(G\Box K_2)=\frac{n}{2}$. In this paper, under the same condition we obtain $f(G\Box C_{2k})=n ~(k\ge2)$. Also this method can be applied to some non-balanced bipartite graphs $G$ whenever $G$ admit a weighted bi-adjacency matrix with orthogonal rows. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10981v1-abstract-full').style.display = 'none'; document.getElementById('2502.10981v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 4figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 05C70 05C50 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10959">arXiv:2502.10959</a> <span> [<a href="https://arxiv.org/pdf/2502.10959">pdf</a>, <a href="https://arxiv.org/format/2502.10959">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Revisiting the Design of In-Memory Dynamic Graph Storage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Su%2C+J">Jixian Su</a>, <a href="/search/?searchtype=author&query=Hao%2C+C">Chiyu Hao</a>, <a href="/search/?searchtype=author&query=Sun%2C+S">Shixuan Sun</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/?searchtype=author&query=Gao%2C+S">Sen Gao</a>, <a href="/search/?searchtype=author&query=Jiang%2C+J">Jiaxin Jiang</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yao Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+C">Chenyi Zhang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/?searchtype=author&query=Guo%2C+M">Minyi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10959v1-abstract-short" style="display: inline;"> The effectiveness of in-memory dynamic graph storage (DGS) for supporting concurrent graph read and write queries is crucial for real-time graph analytics and updates. Various methods have been proposed, for example, LLAMA, Aspen, LiveGraph, Teseo, and Sortledton. These approaches differ significantly in their support for read and write operations, space overhead, and concurrency control. However,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10959v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10959v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10959v1-abstract-full" style="display: none;"> The effectiveness of in-memory dynamic graph storage (DGS) for supporting concurrent graph read and write queries is crucial for real-time graph analytics and updates. Various methods have been proposed, for example, LLAMA, Aspen, LiveGraph, Teseo, and Sortledton. These approaches differ significantly in their support for read and write operations, space overhead, and concurrency control. However, there has been no systematic study to explore the trade-offs among these dimensions. In this paper, we evaluate the effectiveness of individual techniques and identify the performance factors affecting these storage methods by proposing a common abstraction for DGS design and implementing a generic test framework based on this abstraction. Our findings highlight several key insights: 1) Existing DGS methods exhibit substantial space overhead. For example, Aspen consumes 3.3-10.8x more memory than CSR, while the optimal fine-grained methods consume 4.1-8.9x more memory than CSR, indicating a significant memory overhead. 2) Existing methods often overlook memory access impact of modern architectures, leading to performance degradation compared to continuous storage methods. 3) Fine-grained concurrency control methods, in particular, suffer from severe efficiency and space issues due to maintaining versions and performing checks for each neighbor. These methods also experience significant contention on high-degree vertices. Our systematic study reveals these performance bottlenecks and outlines future directions to improve DGS for real-time graph analytics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10959v1-abstract-full').style.display = 'none'; document.getElementById('2502.10959v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10505">arXiv:2502.10505</a> <span> [<a href="https://arxiv.org/pdf/2502.10505">pdf</a>, <a href="https://arxiv.org/format/2502.10505">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Preference learning made easy: Everything should be understood through win rate </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+L+H">Lily H. Zhang</a>, <a href="/search/?searchtype=author&query=Ranganath%2C+R">Rajesh Ranganath</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10505v1-abstract-short" style="display: inline;"> Preference learning, or the task of aligning generative models to preference comparison data, has yet to reach the conceptual maturity of classification, density estimation, etc. To close this gap, this work presents a framework to understand preference learning starting from the sampling distribution of pairwise preference data. First, we prove that the only evaluation of a generative model that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10505v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10505v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10505v1-abstract-full" style="display: none;"> Preference learning, or the task of aligning generative models to preference comparison data, has yet to reach the conceptual maturity of classification, density estimation, etc. To close this gap, this work presents a framework to understand preference learning starting from the sampling distribution of pairwise preference data. First, we prove that the only evaluation of a generative model that respects both preferences and prevalences in the data distribution is a form of win rate, justifying win rate as the focal point to understand preference learning. We then analyze preference learning methods as win rate optimization (WRO) or non-WRO. We present novel instances of WRO beyond existing examples (RLHF, NLHF) and identify two key theoretical benefits of all such methods. We prove that common non-WRO methods like DPO and SFT on preferred samples lack these properties and suggest ways to mitigate such theoretical limitations. We also show that WRO underperforms in practice due optimization difficulties and that optimization success predicts performance better than choices which affect the objective's solution. Our analysis highlights best practices for existing methods and provides recommendations for future research, guided by the principle that one should either align non-WRO methods more closely with WRO or improve the optimization of WRO objectives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10505v1-abstract-full').style.display = 'none'; document.getElementById('2502.10505v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10407">arXiv:2502.10407</a> <span> [<a href="https://arxiv.org/pdf/2502.10407">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.im.2025.104103">10.1016/j.im.2025.104103 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Addressing Bias in Generative AI: Challenges and Research Opportunities in Information Management </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wei%2C+X">Xiahua Wei</a>, <a href="/search/?searchtype=author&query=Kumar%2C+N">Naveen Kumar</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Han Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10407v1-abstract-short" style="display: inline;"> Generative AI technologies, particularly Large Language Models (LLMs), have transformed information management systems but introduced substantial biases that can compromise their effectiveness in informing business decision-making. This challenge presents information management scholars with a unique opportunity to advance the field by identifying and addressing these biases across extensive appli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10407v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10407v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10407v1-abstract-full" style="display: none;"> Generative AI technologies, particularly Large Language Models (LLMs), have transformed information management systems but introduced substantial biases that can compromise their effectiveness in informing business decision-making. This challenge presents information management scholars with a unique opportunity to advance the field by identifying and addressing these biases across extensive applications of LLMs. Building on the discussion on bias sources and current methods for detecting and mitigating bias, this paper seeks to identify gaps and opportunities for future research. By incorporating ethical considerations, policy implications, and sociotechnical perspectives, we focus on developing a framework that covers major stakeholders of Generative AI systems, proposing key research questions, and inspiring discussion. Our goal is to provide actionable pathways for researchers to address bias in LLM applications, thereby advancing research in information management that ultimately informs business practices. Our forward-looking framework and research agenda advocate interdisciplinary approaches, innovative methods, dynamic perspectives, and rigorous evaluation to ensure fairness and transparency in Generative AI-driven information systems. We expect this study to serve as a call to action for information management scholars to tackle this critical issue, guiding the improvement of fairness and effectiveness in LLM-based systems for business practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10407v1-abstract-full').style.display = 'none'; document.getElementById('2502.10407v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Information & Management, forthcoming</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10391">arXiv:2502.10391</a> <span> [<a href="https://arxiv.org/pdf/2502.10391">pdf</a>, <a href="https://arxiv.org/format/2502.10391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MM-RLHF: The Next Step Forward in Multimodal LLM Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yi-Fan Zhang</a>, <a href="/search/?searchtype=author&query=Yu%2C+T">Tao Yu</a>, <a href="/search/?searchtype=author&query=Tian%2C+H">Haochen Tian</a>, <a href="/search/?searchtype=author&query=Fu%2C+C">Chaoyou Fu</a>, <a href="/search/?searchtype=author&query=Li%2C+P">Peiyan Li</a>, <a href="/search/?searchtype=author&query=Zeng%2C+J">Jianshu Zeng</a>, <a href="/search/?searchtype=author&query=Xie%2C+W">Wulin Xie</a>, <a href="/search/?searchtype=author&query=Shi%2C+Y">Yang Shi</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Huanyu Zhang</a>, <a href="/search/?searchtype=author&query=Wu%2C+J">Junkang Wu</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xue Wang</a>, <a href="/search/?searchtype=author&query=Hu%2C+Y">Yibo Hu</a>, <a href="/search/?searchtype=author&query=Wen%2C+B">Bin Wen</a>, <a href="/search/?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhang Zhang</a>, <a href="/search/?searchtype=author&query=Gao%2C+T">Tingting Gao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+D">Di Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Liang Wang</a>, <a href="/search/?searchtype=author&query=Jin%2C+R">Rong Jin</a>, <a href="/search/?searchtype=author&query=Tan%2C+T">Tieniu Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10391v1-abstract-short" style="display: inline;"> Despite notable advancements in Multimodal Large Language Models (MLLMs), most state-of-the-art models have not undergone thorough alignment with human preferences. This gap exists because current alignment research has primarily achieved progress in specific areas (e.g., hallucination reduction), while the broader question of whether aligning models with human preferences can systematically enhan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10391v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10391v1-abstract-full" style="display: none;"> Despite notable advancements in Multimodal Large Language Models (MLLMs), most state-of-the-art models have not undergone thorough alignment with human preferences. This gap exists because current alignment research has primarily achieved progress in specific areas (e.g., hallucination reduction), while the broader question of whether aligning models with human preferences can systematically enhance MLLM capability remains largely unexplored. To this end, we introduce MM-RLHF, a dataset containing $\mathbf{120k}$ fine-grained, human-annotated preference comparison pairs. This dataset represents a substantial advancement over existing resources, offering superior size, diversity, annotation granularity, and quality. Leveraging this dataset, we propose several key innovations to improve both the quality of reward models and the efficiency of alignment algorithms. Notably, we introduce a Critique-Based Reward Model, which generates critiques of model outputs before assigning scores, offering enhanced interpretability and more informative feedback compared to traditional scalar reward mechanisms. Additionally, we propose Dynamic Reward Scaling, a method that adjusts the loss weight of each sample according to the reward signal, thereby optimizing the use of high-quality comparison pairs. Our approach is rigorously evaluated across $\mathbf{10}$ distinct dimensions and $\mathbf{27}$ benchmarks, with results demonstrating significant and consistent improvements in model performance. Specifically, fine-tuning LLaVA-ov-7B with MM-RLHF and our alignment algorithm leads to a $\mathbf{19.5}$% increase in conversational abilities and a $\mathbf{60}$% improvement in safety. We have open-sourced the preference dataset, reward model, training and evaluation code, as well as reward modeling and safety benchmarks. For more details, please visit our project page: https://mm-rlhf.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10391v1-abstract-full').style.display = 'none'; document.getElementById('2502.10391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://mm-rlhf.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09560">arXiv:2502.09560</a> <span> [<a href="https://arxiv.org/pdf/2502.09560">pdf</a>, <a href="https://arxiv.org/format/2502.09560">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> EmbodiedBench: Comprehensive Benchmarking Multi-modal Large Language Models for Vision-Driven Embodied Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yang%2C+R">Rui Yang</a>, <a href="/search/?searchtype=author&query=Chen%2C+H">Hanyang Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Junyu Zhang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+M">Mark Zhao</a>, <a href="/search/?searchtype=author&query=Qian%2C+C">Cheng Qian</a>, <a href="/search/?searchtype=author&query=Wang%2C+K">Kangrui Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Q">Qineng Wang</a>, <a href="/search/?searchtype=author&query=Koripella%2C+T+V">Teja Venkat Koripella</a>, <a href="/search/?searchtype=author&query=Movahedi%2C+M">Marziyeh Movahedi</a>, <a href="/search/?searchtype=author&query=Li%2C+M">Manling Li</a>, <a href="/search/?searchtype=author&query=Ji%2C+H">Heng Ji</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Huan Zhang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+T">Tong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09560v1-abstract-short" style="display: inline;"> Leveraging Multi-modal Large Language Models (MLLMs) to create embodied agents offers a promising avenue for tackling real-world tasks. While language-centric embodied agents have garnered substantial attention, MLLM-based embodied agents remain underexplored due to the lack of comprehensive evaluation frameworks. To bridge this gap, we introduce EmbodiedBench, an extensive benchmark designed to e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09560v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09560v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09560v1-abstract-full" style="display: none;"> Leveraging Multi-modal Large Language Models (MLLMs) to create embodied agents offers a promising avenue for tackling real-world tasks. While language-centric embodied agents have garnered substantial attention, MLLM-based embodied agents remain underexplored due to the lack of comprehensive evaluation frameworks. To bridge this gap, we introduce EmbodiedBench, an extensive benchmark designed to evaluate vision-driven embodied agents. EmbodiedBench features: (1) a diverse set of 1,128 testing tasks across four environments, ranging from high-level semantic tasks (e.g., household) to low-level tasks involving atomic actions (e.g., navigation and manipulation); and (2) six meticulously curated subsets evaluating essential agent capabilities like commonsense reasoning, complex instruction understanding, spatial awareness, visual perception, and long-term planning. Through extensive experiments, we evaluated 13 leading proprietary and open-source MLLMs within EmbodiedBench. Our findings reveal that: MLLMs excel at high-level tasks but struggle with low-level manipulation, with the best model, GPT-4o, scoring only 28.9% on average. EmbodiedBench provides a multifaceted standardized evaluation platform that not only highlights existing challenges but also offers valuable insights to advance MLLM-based embodied agents. Our code is available at https://embodiedbench.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09560v1-abstract-full').style.display = 'none'; document.getElementById('2502.09560v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">51 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09430">arXiv:2502.09430</a> <span> [<a href="https://arxiv.org/pdf/2502.09430">pdf</a>, <a href="https://arxiv.org/ps/2502.09430">ps</a>, <a href="https://arxiv.org/format/2502.09430">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Algebra">math.QA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Rings and Algebras">math.RA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Representation Theory">math.RT</span> </div> </div> <p class="title is-5 mathjax"> Analogue of Feigin's map on $\imath$quantum group of split type </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lu%2C+M">Ming Lu</a>, <a href="/search/?searchtype=author&query=Ruan%2C+S">Shiquan Ruan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Haicheng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09430v1-abstract-short" style="display: inline;"> The (universal) $\imath$quantum groups are as a vast generalization of (Drinfeld double) quantum groups. We establish an algebra homomorphism from universal $\imath$quantum group of split type to a certain quantum torus, which can be viewed as an $\imath$analogue of Feigin's map on the quantum group. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09430v1-abstract-full" style="display: none;"> The (universal) $\imath$quantum groups are as a vast generalization of (Drinfeld double) quantum groups. We establish an algebra homomorphism from universal $\imath$quantum group of split type to a certain quantum torus, which can be viewed as an $\imath$analogue of Feigin's map on the quantum group. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09430v1-abstract-full').style.display = 'none'; document.getElementById('2502.09430v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 17B37; 05E10; 17B67 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09399">arXiv:2502.09399</a> <span> [<a href="https://arxiv.org/pdf/2502.09399">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> Nondestructive quantitative estimation of cross-sectional corrosion degree of rebar using self-magnetic flux leakage field variation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Qiu%2C+J">Junli Qiu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+W">Weiping Zhang</a>, <a href="/search/?searchtype=author&query=Jiang%2C+C">Chao Jiang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hong Zhang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J">Jianting Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09399v1-abstract-short" style="display: inline;"> To accurately assess the structural performance of corroded reinforced concrete structures, obtaining precise information on the corrosion range and corrosion degree of rebar is crucial. In this study, based on a comprehensive analysis of extensive high-precision magnetic field and three-dimensional structural light scanning data of 21 corroded rebars, it was found that the self-magnetic flux leak… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09399v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09399v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09399v1-abstract-full" style="display: none;"> To accurately assess the structural performance of corroded reinforced concrete structures, obtaining precise information on the corrosion range and corrosion degree of rebar is crucial. In this study, based on a comprehensive analysis of extensive high-precision magnetic field and three-dimensional structural light scanning data of 21 corroded rebars, it was found that the self-magnetic flux leakage can accurately identify the corrosion range, with an error not exceeding 3%. A proposed quantitative index NHx of the self-magnetic flux leakage amplitude exhibits a linear correlation with the cross-sectional corrosion degree of rebar, whose probability density distribution can be accurately described using the Weibull distribution function. Utilizing the Weibull distribution function of NHx and a Bayesian model, automatically rapid quantification of the rebar's cross-sectional corrosion degree based on the non-destructive testing-derived NHx values can be conveniently realized. This self-magnetic flux leakage-based novel method for quantifying rebar's cross-sectional corrosion degree is accurate, efficient, and well-suited for practical engineering applications, providing robust support for a precise assessment of the structural performance of corroded reinforced concrete structures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09399v1-abstract-full').style.display = 'none'; document.getElementById('2502.09399v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09392">arXiv:2502.09392</a> <span> [<a href="https://arxiv.org/pdf/2502.09392">pdf</a>, <a href="https://arxiv.org/ps/2502.09392">ps</a>, <a href="https://arxiv.org/format/2502.09392">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Other Quantitative Biology">q-bio.OT</span> </div> </div> <p class="title is-5 mathjax"> Conditional Success of Adaptive Therapy: The Role of Treatment-Holiday Thresholds Revealed by Mathematical Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Sun%2C+L">Lanfei Sun</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Haifeng Zhang</a>, <a href="/search/?searchtype=author&query=Kang%2C+K">Kai Kang</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xiaoxin Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+L">Leyi Zhang</a>, <a href="/search/?searchtype=author&query=Cai%2C+Y">Yanan Cai</a>, <a href="/search/?searchtype=author&query=Zhuge%2C+C">Changjing Zhuge</a>, <a href="/search/?searchtype=author&query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09392v2-abstract-short" style="display: inline;"> Adaptive therapy (AT) improves cancer treatment by controlling the competition between sensitive and resistant cells through treatment holidays. This study highlights the critical role of treatment-holiday thresholds in AT for tumors composed of drug-sensitive and resistant cells. Using a Lotka-Volterra model, the research compares AT with maximum tolerated dose therapy and intermittent therapy, s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09392v2-abstract-full').style.display = 'inline'; document.getElementById('2502.09392v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09392v2-abstract-full" style="display: none;"> Adaptive therapy (AT) improves cancer treatment by controlling the competition between sensitive and resistant cells through treatment holidays. This study highlights the critical role of treatment-holiday thresholds in AT for tumors composed of drug-sensitive and resistant cells. Using a Lotka-Volterra model, the research compares AT with maximum tolerated dose therapy and intermittent therapy, showing that AT's success largely depends on the threshold at which treatment is paused and resumed, as well as on the competition between sensitive and resistant cells. Three scenarios of comparison between AT and other therapies are identified: uniform-decline, conditional-improve, and uniform-improve, illustrating that optimizing the treatment-holiday threshold is crucial for AT effectiveness. Tumor composition, including initial tumor burden and the proportion of resistant cells, influences outcomes. Adjusting threshold values enables AT to suppress resistant subclones, preserving sensitive cells, ultimately improving progression-free survival. These findings emphasize the importance of personalized treatment strategies potentially enhancing long-term therapeutic outcomes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09392v2-abstract-full').style.display = 'none'; document.getElementById('2502.09392v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 92-10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09268">arXiv:2502.09268</a> <span> [<a href="https://arxiv.org/pdf/2502.09268">pdf</a>, <a href="https://arxiv.org/format/2502.09268">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GEVRM: Goal-Expressive Video Generation Model For Robust Visual Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongyin Zhang</a>, <a href="/search/?searchtype=author&query=Ding%2C+P">Pengxiang Ding</a>, <a href="/search/?searchtype=author&query=Lyu%2C+S">Shangke Lyu</a>, <a href="/search/?searchtype=author&query=Peng%2C+Y">Ying Peng</a>, <a href="/search/?searchtype=author&query=Wang%2C+D">Donglin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09268v2-abstract-short" style="display: inline;"> With the rapid development of embodied artificial intelligence, significant progress has been made in vision-language-action (VLA) models for general robot decision-making. However, the majority of existing VLAs fail to account for the inevitable external perturbations encountered during deployment. These perturbations introduce unforeseen state information to the VLA, resulting in inaccurate acti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09268v2-abstract-full').style.display = 'inline'; document.getElementById('2502.09268v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09268v2-abstract-full" style="display: none;"> With the rapid development of embodied artificial intelligence, significant progress has been made in vision-language-action (VLA) models for general robot decision-making. However, the majority of existing VLAs fail to account for the inevitable external perturbations encountered during deployment. These perturbations introduce unforeseen state information to the VLA, resulting in inaccurate actions and consequently, a significant decline in generalization performance. The classic internal model control (IMC) principle demonstrates that a closed-loop system with an internal model that includes external input signals can accurately track the reference input and effectively offset the disturbance. We propose a novel closed-loop VLA method GEVRM that integrates the IMC principle to enhance the robustness of robot visual manipulation. The text-guided video generation model in GEVRM can generate highly expressive future visual planning goals. Simultaneously, we evaluate perturbations by simulating responses, which are called internal embeddings and optimized through prototype contrastive learning. This allows the model to implicitly infer and distinguish perturbations from the external environment. The proposed GEVRM achieves state-of-the-art performance on both standard and perturbed CALVIN benchmarks and shows significant improvements in realistic robot tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09268v2-abstract-full').style.display = 'none'; document.getElementById('2502.09268v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09106">arXiv:2502.09106</a> <span> [<a href="https://arxiv.org/pdf/2502.09106">pdf</a>, <a href="https://arxiv.org/format/2502.09106">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scaling Law for Stochastic Gradient Descent in Quadratically Parameterized Linear Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ding%2C+S">Shihong Ding</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Haihan Zhang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Hanzhen Zhao</a>, <a href="/search/?searchtype=author&query=Fang%2C+C">Cong Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09106v1-abstract-short" style="display: inline;"> In machine learning, the scaling law describes how the model performance improves with the model and data size scaling up. From a learning theory perspective, this class of results establishes upper and lower generalization bounds for a specific learning algorithm. Here, the exact algorithm running using a specific model parameterization often offers a crucial implicit regularization effect, leadi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09106v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09106v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09106v1-abstract-full" style="display: none;"> In machine learning, the scaling law describes how the model performance improves with the model and data size scaling up. From a learning theory perspective, this class of results establishes upper and lower generalization bounds for a specific learning algorithm. Here, the exact algorithm running using a specific model parameterization often offers a crucial implicit regularization effect, leading to good generalization. To characterize the scaling law, previous theoretical studies mainly focus on linear models, whereas, feature learning, a notable process that contributes to the remarkable empirical success of neural networks, is regretfully vacant. This paper studies the scaling law over a linear regression with the model being quadratically parameterized. We consider infinitely dimensional data and slope ground truth, both signals exhibiting certain power-law decay rates. We study convergence rates for Stochastic Gradient Descent and demonstrate the learning rates for variables will automatically adapt to the ground truth. As a result, in the canonical linear regression, we provide explicit separations for generalization curves between SGD with and without feature learning, and the information-theoretical lower bound that is agnostic to parametrization method and the algorithm. Our analysis for decaying ground truth provides a new characterization for the learning dynamic of the model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09106v1-abstract-full').style.display = 'none'; document.getElementById('2502.09106v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09047">arXiv:2502.09047</a> <span> [<a href="https://arxiv.org/pdf/2502.09047">pdf</a>, <a href="https://arxiv.org/ps/2502.09047">ps</a>, <a href="https://arxiv.org/format/2502.09047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Optimal Algorithms in Linear Regression under Covariate Shift: On the Importance of Precondition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+Y">Yuanshi Liu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Haihan Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+Q">Qian Chen</a>, <a href="/search/?searchtype=author&query=Fang%2C+C">Cong Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09047v1-abstract-short" style="display: inline;"> A common pursuit in modern statistical learning is to attain satisfactory generalization out of the source data distribution (OOD). In theory, the challenge remains unsolved even under the canonical setting of covariate shift for the linear model. This paper studies the foundational (high-dimensional) linear regression where the ground truth variables are confined to an ellipse-shape constraint an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09047v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09047v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09047v1-abstract-full" style="display: none;"> A common pursuit in modern statistical learning is to attain satisfactory generalization out of the source data distribution (OOD). In theory, the challenge remains unsolved even under the canonical setting of covariate shift for the linear model. This paper studies the foundational (high-dimensional) linear regression where the ground truth variables are confined to an ellipse-shape constraint and addresses two fundamental questions in this regime: (i) given the target covariate matrix, what is the min-max \emph{optimal} algorithm under covariate shift? (ii) for what kinds of target classes, the commonly-used SGD-type algorithms achieve optimality? Our analysis starts with establishing a tight lower generalization bound via a Bayesian Cramer-Rao inequality. For (i), we prove that the optimal estimator can be simply a certain linear transformation of the best estimator for the source distribution. Given the source and target matrices, we show that the transformation can be efficiently computed via a convex program. The min-max optimal analysis for SGD leverages the idea that we recognize both the accumulated updates of the applied algorithms and the ideal transformation as preconditions on the learning variables. We provide sufficient conditions when SGD with its acceleration variants attain optimality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09047v1-abstract-full').style.display = 'none'; document.getElementById('2502.09047v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08991">arXiv:2502.08991</a> <span> [<a href="https://arxiv.org/pdf/2502.08991">pdf</a>, <a href="https://arxiv.org/format/2502.08991">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Task Generalization With AutoRegressive Compositional Structure: Can Learning From $\d$ Tasks Generalize to $\d^{T}$ Tasks? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Abedsoltan%2C+A">Amirhesam Abedsoltan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Huaqing Zhang</a>, <a href="/search/?searchtype=author&query=Wen%2C+K">Kaiyue Wen</a>, <a href="/search/?searchtype=author&query=Lin%2C+H">Hongzhou Lin</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jingzhao Zhang</a>, <a href="/search/?searchtype=author&query=Belkin%2C+M">Mikhail Belkin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08991v1-abstract-short" style="display: inline;"> Large language models (LLMs) exhibit remarkable task generalization, solving tasks they were never explicitly trained on with only a few demonstrations. This raises a fundamental question: When can learning from a small set of tasks generalize to a large task family? In this paper, we investigate task generalization through the lens of AutoRegressive Compositional (ARC) structure, where each task… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08991v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08991v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08991v1-abstract-full" style="display: none;"> Large language models (LLMs) exhibit remarkable task generalization, solving tasks they were never explicitly trained on with only a few demonstrations. This raises a fundamental question: When can learning from a small set of tasks generalize to a large task family? In this paper, we investigate task generalization through the lens of AutoRegressive Compositional (ARC) structure, where each task is a composition of $T$ operations, and each operation is among a finite family of $\d$ subtasks. This yields a total class of size~$ \d^\TT $. We first show that generalization to all $ \d^\TT $ tasks is theoretically achievable by training on only $ \tilde{O}(\d) $ tasks. Empirically, we demonstrate that Transformers achieve such exponential task generalization on sparse parity functions via in-context learning (ICL) and Chain-of-Thought (CoT) reasoning. We further demonstrate this generalization in arithmetic and language translation, extending beyond parity functions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08991v1-abstract-full').style.display = 'none'; document.getElementById('2502.08991v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08975">arXiv:2502.08975</a> <span> [<a href="https://arxiv.org/pdf/2502.08975">pdf</a>, <a href="https://arxiv.org/format/2502.08975">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Small Molecule Drug Discovery Through Deep Learning:Progress, Challenges, and Opportunities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+K">Kun Li</a>, <a href="/search/?searchtype=author&query=Xiong%2C+Y">Yida Xiong</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongzhi Zhang</a>, <a href="/search/?searchtype=author&query=Cai%2C+X">Xiantao Cai</a>, <a href="/search/?searchtype=author&query=Du%2C+B">Bo Du</a>, <a href="/search/?searchtype=author&query=Hu%2C+W">Wenbin Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08975v1-abstract-short" style="display: inline;"> Due to their excellent drug-like and pharmacokinetic properties, small molecule drugs are widely used to treat various diseases, making them a critical component of drug discovery. In recent years, with the rapid development of deep learning (DL) techniques, DL-based small molecule drug discovery methods have achieved excellent performance in prediction accuracy, speed, and complex molecular relat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08975v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08975v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08975v1-abstract-full" style="display: none;"> Due to their excellent drug-like and pharmacokinetic properties, small molecule drugs are widely used to treat various diseases, making them a critical component of drug discovery. In recent years, with the rapid development of deep learning (DL) techniques, DL-based small molecule drug discovery methods have achieved excellent performance in prediction accuracy, speed, and complex molecular relationship modeling compared to traditional machine learning approaches. These advancements enhance drug screening efficiency and optimization, and they provide more precise and effective solutions for various drug discovery tasks. Contributing to this field's development, this paper aims to systematically summarize and generalize the recent key tasks and representative techniques in DL-based small molecule drug discovery in recent years. Specifically, we provide an overview of the major tasks in small molecule drug discovery and their interrelationships. Next, we analyze the six core tasks, summarizing the related methods, commonly used datasets, and technological development trends. Finally, we discuss key challenges, such as interpretability and out-of-distribution generalization, and offer our insights into future research directions for DL-assisted small molecule drug discovery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08975v1-abstract-full').style.display = 'none'; document.getElementById('2502.08975v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 1 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08929">arXiv:2502.08929</a> <span> [<a href="https://arxiv.org/pdf/2502.08929">pdf</a>, <a href="https://arxiv.org/ps/2502.08929">ps</a>, <a href="https://arxiv.org/format/2502.08929">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Precise Measurement of the $蠂_{c0}$ Resonance Parameters and Branching Fractions of $蠂_{c0,c2}\to蟺^+蟺^-/K^+K^-$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=BESIII+Collaboration"> BESIII Collaboration</a>, <a href="/search/?searchtype=author&query=Ablikim%2C+M">M. Ablikim</a>, <a href="/search/?searchtype=author&query=Achasov%2C+M+N">M. N. Achasov</a>, <a href="/search/?searchtype=author&query=Adlarson%2C+P">P. Adlarson</a>, <a href="/search/?searchtype=author&query=Afedulidis%2C+O">O. Afedulidis</a>, <a href="/search/?searchtype=author&query=Ai%2C+X+C">X. C. Ai</a>, <a href="/search/?searchtype=author&query=Aliberti%2C+R">R. Aliberti</a>, <a href="/search/?searchtype=author&query=Amoroso%2C+A">A. Amoroso</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Y. Bai</a>, <a href="/search/?searchtype=author&query=Bakina%2C+O">O. Bakina</a>, <a href="/search/?searchtype=author&query=Balossino%2C+I">I. Balossino</a>, <a href="/search/?searchtype=author&query=Ban%2C+Y">Y. Ban</a>, <a href="/search/?searchtype=author&query=Bao%2C+H+-">H. -R. Bao</a>, <a href="/search/?searchtype=author&query=Batozskaya%2C+V">V. Batozskaya</a>, <a href="/search/?searchtype=author&query=Begzsuren%2C+K">K. Begzsuren</a>, <a href="/search/?searchtype=author&query=Berger%2C+N">N. Berger</a>, <a href="/search/?searchtype=author&query=Berlowski%2C+M">M. Berlowski</a>, <a href="/search/?searchtype=author&query=Bertani%2C+M">M. Bertani</a>, <a href="/search/?searchtype=author&query=Bettoni%2C+D">D. Bettoni</a>, <a href="/search/?searchtype=author&query=Bianchi%2C+F">F. Bianchi</a>, <a href="/search/?searchtype=author&query=Bianco%2C+E">E. Bianco</a>, <a href="/search/?searchtype=author&query=Bortone%2C+A">A. Bortone</a>, <a href="/search/?searchtype=author&query=Boyko%2C+I">I. Boyko</a>, <a href="/search/?searchtype=author&query=Briere%2C+R+A">R. A. Briere</a>, <a href="/search/?searchtype=author&query=Brueggemann%2C+A">A. Brueggemann</a> , et al. (648 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08929v1-abstract-short" style="display: inline;"> By analyzing a $蠄(3686)$ data sample containing $(107.7\pm0.6)\times10^{6}$ events taken with the BESIII detector at the BEPCII storage ring in 2009, the $蠂_{c0}$ resonance parameters are precisely measured using $蠂_{c0,c2} \to 蟺^+蟺^-/K^+K^-$ events. The mass of $蠂_{c0}$ is determined to be $M(蠂_{c0})=(3415.67\pm0.07\pm0.06\pm0.07$)~MeV/$c^2$, and its full width is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08929v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08929v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08929v1-abstract-full" style="display: none;"> By analyzing a $蠄(3686)$ data sample containing $(107.7\pm0.6)\times10^{6}$ events taken with the BESIII detector at the BEPCII storage ring in 2009, the $蠂_{c0}$ resonance parameters are precisely measured using $蠂_{c0,c2} \to 蟺^+蟺^-/K^+K^-$ events. The mass of $蠂_{c0}$ is determined to be $M(蠂_{c0})=(3415.67\pm0.07\pm0.06\pm0.07$)~MeV/$c^2$, and its full width is $螕(蠂_{c0})=(12.44\pm0.12\pm0.12)~{\rm MeV}$, where the first uncertainty is statistical, the second systematic, and the third for mass comes from $蠂_{c2}$ mass uncertainty. These measurements improve the precision of $蠂_{c0}$ mass by a factor of four and width by one order of magnitude over the previous individual measurements, and significantly boost our knowledge about the charmonium spectrum. Together with additional $(345.4\pm2.6)\times10^{6}$ $蠄(3686)$ data events taken in 2012, the decay branching fractions of $蠂_{c0,c2}\to蟺^+蟺^-/K^+K^-$ are measured as well, with precision improved by a factor of three compared to previous measurements. These $蠂_{c0}$ decay branching fractions provide important inputs for the study of glueballs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08929v1-abstract-full').style.display = 'none'; document.getElementById('2502.08929v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08904">arXiv:2502.08904</a> <span> [<a href="https://arxiv.org/pdf/2502.08904">pdf</a>, <a href="https://arxiv.org/format/2502.08904">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MIH-TCCT: Mitigating Inconsistent Hallucinations in LLMs via Event-Driven Text-Code Cyclic Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=You%2C+X">Xinxin You</a>, <a href="/search/?searchtype=author&query=Liu%2C+X">Xien Liu</a>, <a href="/search/?searchtype=author&query=Sun%2C+Q">Qixin Sun</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Huan Zhang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+K">Kaiyin Zhou</a>, <a href="/search/?searchtype=author&query=Liu%2C+S">Shaohui Liu</a>, <a href="/search/?searchtype=author&query=Hu%2C+G">GuoPing Hu</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">ShiJin Wang</a>, <a href="/search/?searchtype=author&query=Liu%2C+S">Si Liu</a>, <a href="/search/?searchtype=author&query=Wu%2C+J">Ji Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08904v2-abstract-short" style="display: inline;"> Recent methodologies utilizing synthetic datasets have aimed to address inconsistent hallucinations in large language models (LLMs); however,these approaches are primarily tailored to specific tasks, limiting their generalizability. Inspired by the strong performance of code-trained models in logic-intensive domains, we propose a novel framework that leverages event-based text to generate correspo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08904v2-abstract-full').style.display = 'inline'; document.getElementById('2502.08904v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08904v2-abstract-full" style="display: none;"> Recent methodologies utilizing synthetic datasets have aimed to address inconsistent hallucinations in large language models (LLMs); however,these approaches are primarily tailored to specific tasks, limiting their generalizability. Inspired by the strong performance of code-trained models in logic-intensive domains, we propose a novel framework that leverages event-based text to generate corresponding code and employs cyclic training to transfer the logical consistency of code to natural language effectively. Our method significantly reduces inconsistent hallucinations across three leading LLMs and two categories of natural language tasks while maintaining overall performance. This framework effectively alleviates hallucinations without necessitating adaptation to downstream tasks, demonstrating generality and providing new perspectives to tackle the challenge of inconsistent hallucinations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08904v2-abstract-full').style.display = 'none'; document.getElementById('2502.08904v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository