CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 4,899 results for author: <span class="mathjax">Li, H</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Li%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Li, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Li%2C+H&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Li, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17434">arXiv:2502.17434</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.17434">pdf</a>, <a href="https://arxiv.org/format/2502.17434">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> V-HOP: Visuo-Haptic 6D Object Pose Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongyu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+M">Mingxi Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Akbulut%2C+T">Tuluhan Akbulut</a>, <a href="/search/cs?searchtype=author&amp;query=Xiang%2C+Y">Yu Xiang</a>, <a href="/search/cs?searchtype=author&amp;query=Konidaris%2C+G">George Konidaris</a>, <a href="/search/cs?searchtype=author&amp;query=Sridhar%2C+S">Srinath Sridhar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17434v1-abstract-short" style="display: inline;"> Humans naturally integrate vision and haptics for robust object perception during manipulation. The loss of either modality significantly degrades performance. Inspired by this multisensory integration, prior object pose estimation research has attempted to combine visual and haptic/tactile feedback. Although these works demonstrate improvements in controlled environments or synthetic datasets, th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17434v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17434v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17434v1-abstract-full" style="display: none;"> Humans naturally integrate vision and haptics for robust object perception during manipulation. The loss of either modality significantly degrades performance. Inspired by this multisensory integration, prior object pose estimation research has attempted to combine visual and haptic/tactile feedback. Although these works demonstrate improvements in controlled environments or synthetic datasets, they often underperform vision-only approaches in real-world settings due to poor generalization across diverse grippers, sensor layouts, or sim-to-real environments. Furthermore, they typically estimate the object pose for each frame independently, resulting in less coherent tracking over sequences in real-world deployments. To address these limitations, we introduce a novel unified haptic representation that effectively handles multiple gripper embodiments. Building on this representation, we introduce a new visuo-haptic transformer-based object pose tracker that seamlessly integrates visual and haptic input. We validate our framework in our dataset and the Feelsight dataset, demonstrating significant performance improvement on challenging sequences. Notably, our method achieves superior generalization and robustness across novel embodiments, objects, and sensor types (both taxel-based and vision-based tactile sensors). In real-world experiments, we demonstrate that our approach outperforms state-of-the-art visual trackers by a large margin. We further show that we can achieve precise manipulation tasks by incorporating our real-time object tracking result into motion plans, underscoring the advantages of visuo-haptic perception. Our model and dataset will be made open source upon acceptance of the paper. Project website: https://lhy.xyz/projects/v-hop/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17434v1-abstract-full').style.display = 'none'; document.getElementById('2502.17434v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17041">arXiv:2502.17041</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.17041">pdf</a>, <a href="https://arxiv.org/format/2502.17041">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PrivaCI-Bench: Evaluating Privacy with Contextual Integrity and Legal Compliance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoran Li</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+W">Wenbin Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Jing%2C+H">Huihao Jing</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yulin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Q">Qi Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+S">Sirui Han</a>, <a href="/search/cs?searchtype=author&amp;query=Chu%2C+T">Tianshu Chu</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+P">Peizhao Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Yangqiu Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17041v1-abstract-short" style="display: inline;"> Recent advancements in generative large language models (LLMs) have enabled wider applicability, accessibility, and flexibility. However, their reliability and trustworthiness are still in doubt, especially for concerns regarding individuals&#39; data privacy. Great efforts have been made on privacy by building various evaluation benchmarks to study LLMs&#39; privacy awareness and robustness from their ge&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17041v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17041v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17041v1-abstract-full" style="display: none;"> Recent advancements in generative large language models (LLMs) have enabled wider applicability, accessibility, and flexibility. However, their reliability and trustworthiness are still in doubt, especially for concerns regarding individuals&#39; data privacy. Great efforts have been made on privacy by building various evaluation benchmarks to study LLMs&#39; privacy awareness and robustness from their generated outputs to their hidden representations. Unfortunately, most of these works adopt a narrow formulation of privacy and only investigate personally identifiable information (PII). In this paper, we follow the merit of the Contextual Integrity (CI) theory, which posits that privacy evaluation should not only cover the transmitted attributes but also encompass the whole relevant social context through private information flows. We present PrivaCI-Bench, a comprehensive contextual privacy evaluation benchmark targeted at legal compliance to cover well-annotated privacy and safety regulations, real court cases, privacy policies, and synthetic data built from the official toolkit to study LLMs&#39; privacy and safety compliance. We evaluate the latest LLMs, including the recent reasoner models QwQ-32B and Deepseek R1. Our experimental results suggest that though LLMs can effectively capture key CI parameters inside a given context, they still require further advancements for privacy compliance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17041v1-abstract-full').style.display = 'none'; document.getElementById('2502.17041v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Webpage: https://hkust-knowcomp.github.io/privacy/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16776">arXiv:2502.16776</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16776">pdf</a>, <a href="https://arxiv.org/format/2502.16776">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AISafetyLab: A Comprehensive Framework for AI Safety Evaluation and Improvement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhexin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+L">Leqi Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Junxiao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+X">Xijie Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Y">Yida Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shiyao Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+R">Renmiao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qinglin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xinyuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+X">Xianqi Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+C">Chengwei Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Sha%2C+L">Lei Sha</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongning Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+M">Minlie Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16776v1-abstract-short" style="display: inline;"> As AI models are increasingly deployed across diverse real-world scenarios, ensuring their safety remains a critical yet underexplored challenge. While substantial efforts have been made to evaluate and enhance AI safety, the lack of a standardized framework and comprehensive toolkit poses significant obstacles to systematic research and practical adoption. To bridge this gap, we introduce AISafet&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16776v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16776v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16776v1-abstract-full" style="display: none;"> As AI models are increasingly deployed across diverse real-world scenarios, ensuring their safety remains a critical yet underexplored challenge. While substantial efforts have been made to evaluate and enhance AI safety, the lack of a standardized framework and comprehensive toolkit poses significant obstacles to systematic research and practical adoption. To bridge this gap, we introduce AISafetyLab, a unified framework and toolkit that integrates representative attack, defense, and evaluation methodologies for AI safety. AISafetyLab features an intuitive interface that enables developers to seamlessly apply various techniques while maintaining a well-structured and extensible codebase for future advancements. Additionally, we conduct empirical studies on Vicuna, analyzing different attack and defense strategies to provide valuable insights into their comparative effectiveness. To facilitate ongoing research and development in AI safety, AISafetyLab is publicly available at https://github.com/thu-coai/AISafetyLab, and we are committed to its continuous maintenance and improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16776v1-abstract-full').style.display = 'none'; document.getElementById('2502.16776v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16734">arXiv:2502.16734</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16734">pdf</a>, <a href="https://arxiv.org/format/2502.16734">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Optimal Adversarial Robust Reinforcement Learning with Infinity Measurement Error </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoran Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zicheng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+W">Wang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+C">Congying Han</a>, <a href="/search/cs?searchtype=author&amp;query=Lv%2C+J">Jiayu Lv</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+T">Tiande Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Y">Yudong Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16734v1-abstract-short" style="display: inline;"> Ensuring the robustness of deep reinforcement learning (DRL) agents against adversarial attacks is critical for their trustworthy deployment. Recent research highlights the challenges of achieving state-adversarial robustness and suggests that an optimal robust policy (ORP) does not always exist, complicating the enforcement of strict robustness constraints. In this paper, we further explore the c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16734v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16734v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16734v1-abstract-full" style="display: none;"> Ensuring the robustness of deep reinforcement learning (DRL) agents against adversarial attacks is critical for their trustworthy deployment. Recent research highlights the challenges of achieving state-adversarial robustness and suggests that an optimal robust policy (ORP) does not always exist, complicating the enforcement of strict robustness constraints. In this paper, we further explore the concept of ORP. We first introduce the Intrinsic State-adversarial Markov Decision Process (ISA-MDP), a novel formulation where adversaries cannot fundamentally alter the intrinsic nature of state observations. ISA-MDP, supported by empirical and theoretical evidence, universally characterizes decision-making under state-adversarial paradigms. We rigorously prove that within ISA-MDP, a deterministic and stationary ORP exists, aligning with the Bellman optimal policy. Our findings theoretically reveal that improving DRL robustness does not necessarily compromise performance in natural environments. Furthermore, we demonstrate the necessity of infinity measurement error (IME) in both $Q$-function and probability spaces to achieve ORP, unveiling vulnerabilities of previous DRL algorithms that rely on $1$-measurement errors. Motivated by these insights, we develop the Consistent Adversarial Robust Reinforcement Learning (CAR-RL) framework, which optimizes surrogates of IME. We apply CAR-RL to both value-based and policy-based DRL algorithms, achieving superior performance and validating our theoretical analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16734v1-abstract-full').style.display = 'none'; document.getElementById('2502.16734v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2402.02165</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16671">arXiv:2502.16671</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16671">pdf</a>, <a href="https://arxiv.org/format/2502.16671">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MimeQA: Towards Socially-Intelligent Nonverbal Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hengzhi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tjandrasuwita%2C+M">Megan Tjandrasuwita</a>, <a href="/search/cs?searchtype=author&amp;query=Fung%2C+Y+R">Yi R. Fung</a>, <a href="/search/cs?searchtype=author&amp;query=Solar-Lezama%2C+A">Armando Solar-Lezama</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+P+P">Paul Pu Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16671v1-abstract-short" style="display: inline;"> Socially intelligent AI that can understand and interact seamlessly with humans in daily lives is increasingly important as AI becomes more closely integrated with peoples&#39; daily activities. However, current works in artificial social reasoning all rely on language-only, or language-dominant approaches to benchmark and training models, resulting in systems that are improving in verbal communicatio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16671v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16671v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16671v1-abstract-full" style="display: none;"> Socially intelligent AI that can understand and interact seamlessly with humans in daily lives is increasingly important as AI becomes more closely integrated with peoples&#39; daily activities. However, current works in artificial social reasoning all rely on language-only, or language-dominant approaches to benchmark and training models, resulting in systems that are improving in verbal communication but struggle with nonverbal social understanding. To address this limitation, we tap into a novel source of data rich in nonverbal and social interactions -- mime videos. Mimes refer to the art of expression through gesture and movement without spoken words, which presents unique challenges and opportunities in interpreting non-verbal social communication. We contribute a new dataset called MimeQA, obtained by sourcing 221 videos from YouTube, through rigorous annotation and verification, resulting in a benchmark with 101 videos and 806 question-answer pairs. Using MimeQA, we evaluate state-of-the-art video large language models (vLLMs) and find that their overall accuracy ranges from 15-30%. Our analysis reveals that vLLMs often fail to ground imagined objects and over-rely on the text prompt while ignoring subtle nonverbal interactions. Our data resources are released at https://github.com/MIT-MI/MimeQA to inspire future work in foundation models that embody true social intelligence capable of interpreting non-verbal human interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16671v1-abstract-full').style.display = 'none'; document.getElementById('2502.16671v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16580">arXiv:2502.16580</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16580">pdf</a>, <a href="https://arxiv.org/format/2502.16580">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Can Indirect Prompt Injection Attacks Be Detected and Removed? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yulin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoran Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sui%2C+Y">Yuan Sui</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+Y">Yufei He</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yue Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Yangqiu Song</a>, <a href="/search/cs?searchtype=author&amp;query=Hooi%2C+B">Bryan Hooi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16580v1-abstract-short" style="display: inline;"> Prompt injection attacks manipulate large language models (LLMs) by misleading them to deviate from the original input instructions and execute maliciously injected instructions, because of their instruction-following capabilities and inability to distinguish between the original input instructions and maliciously injected instructions. To defend against such attacks, recent studies have developed&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16580v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16580v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16580v1-abstract-full" style="display: none;"> Prompt injection attacks manipulate large language models (LLMs) by misleading them to deviate from the original input instructions and execute maliciously injected instructions, because of their instruction-following capabilities and inability to distinguish between the original input instructions and maliciously injected instructions. To defend against such attacks, recent studies have developed various detection mechanisms. While significant efforts have focused on detecting direct prompt injection attacks, where injected instructions are directly from the attacker who is also the user, limited attention has been given to indirect prompt injection attacks, where injected instructions are indirectly from external tools, such as a search engine. Moreover, current works mainly investigate injection detection methods and pay less attention to the post-processing method that aims to mitigate the injection after detection. In this paper, we investigate the feasibility of detecting and removing indirect prompt injection attacks, and we construct a benchmark dataset for evaluation. For detection, we assess the performance of existing LLMs and open-source detection models, and we further train detection models using our crafted training datasets. For removal, we evaluate two intuitive methods: (1) the segmentation removal method, which segments the injected document and removes parts containing injected instructions, and (2) the extraction removal method, which trains an extraction model to identify and remove injected instructions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16580v1-abstract-full').style.display = 'none'; document.getElementById('2502.16580v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16240">arXiv:2502.16240</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16240">pdf</a>, <a href="https://arxiv.org/format/2502.16240">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Speech Enhancement Using Continuous Embeddings of Neural Audio Codec </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yip%2C+J+Q">Jia Qi Yip</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+T">Tianyu Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Chng%2C+E+S">Eng Siong Chng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16240v1-abstract-short" style="display: inline;"> Recent advancements in Neural Audio Codec (NAC) models have inspired their use in various speech processing tasks, including speech enhancement (SE). In this work, we propose a novel, efficient SE approach by leveraging the pre-quantization output of a pretrained NAC encoder. Unlike prior NAC-based SE methods, which process discrete speech tokens using Language Models (LMs), we perform SE within t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16240v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16240v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16240v1-abstract-full" style="display: none;"> Recent advancements in Neural Audio Codec (NAC) models have inspired their use in various speech processing tasks, including speech enhancement (SE). In this work, we propose a novel, efficient SE approach by leveraging the pre-quantization output of a pretrained NAC encoder. Unlike prior NAC-based SE methods, which process discrete speech tokens using Language Models (LMs), we perform SE within the continuous embedding space of the pretrained NAC, which is highly compressed along the time dimension for efficient representation. Our lightweight SE model, optimized through an embedding-level loss, delivers results comparable to SE baselines trained on larger datasets, with a significantly lower real-time factor of 0.005. Additionally, our method achieves a low GMAC of 3.94, reducing complexity 18-fold compared to Sepformer in a simulated cloud-based audio transmission environment. This work highlights a new, efficient NAC-based SE solution, particularly suitable for cloud applications where NAC is used to compress audio before transmission. Copyright 20XX IEEE. Personal use of this material is permitted. Permission from IEEE must be obtained for all other uses, in any current or future media, including reprinting/republishing this material for advertising or promotional purposes, creating new collective works, for resale or redistribution to servers or lists, or reuse of any copyrighted component of this work in other works. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16240v1-abstract-full').style.display = 'none'; document.getElementById('2502.16240v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16190">arXiv:2502.16190</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16190">pdf</a>, <a href="https://arxiv.org/format/2502.16190">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> AdaNDV: Adaptive Number of Distinct Value Estimation viaLearning to Select and Fuse Estimators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xianghong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tieying Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xiao He</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+R">Rong Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+L">Linhui Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+Z">Zhimin Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+S">Shangyu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jianjun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16190v1-abstract-short" style="display: inline;"> Estimating the Number of Distinct Values (NDV) is fundamental for numerous data management tasks, especially within database applications. However, most existing works primarily focus on introducing new statistical or learned estimators, while identifying the most suitable estimator for a given scenario remains largely unexplored. Therefore, we propose AdaNDV, a learned method designed to adaptive&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16190v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16190v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16190v1-abstract-full" style="display: none;"> Estimating the Number of Distinct Values (NDV) is fundamental for numerous data management tasks, especially within database applications. However, most existing works primarily focus on introducing new statistical or learned estimators, while identifying the most suitable estimator for a given scenario remains largely unexplored. Therefore, we propose AdaNDV, a learned method designed to adaptively select and fuse existing estimators to address this issue. Specifically, (1) we propose to use learned models to distinguish between overestimated and underestimated estimators and then select appropriate estimators from each category. This strategy provides a complementary perspective by integrating overestimations and underestimations for error correction, thereby improving the accuracy of NDV estimation. (2) To further integrate the estimation results, we introduce a novel fusion approach that employs a learned model to predict the weights of the selected estimators and then applies a weighted sum to merge them. By combining these strategies, the proposed AdaNDV fundamentally distinguishes itself from previous works that directly estimate NDV. Moreover, extensive experiments conducted on real-world datasets, with the number of individual columns being several orders of magnitude larger than in previous studies, demonstrate the superior performance of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16190v1-abstract-full').style.display = 'none'; document.getElementById('2502.16190v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by VLDB 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16153">arXiv:2502.16153</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16153">pdf</a>, <a href="https://arxiv.org/format/2502.16153">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Understanding Screenwriters&#39; Practices, Attitudes, and Future Expectations in Human-AI Co-Creation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tang%2C+Y">Yuying Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haotian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+M">Minghe Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+X">Xiaojuan Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+H">Huamin Qu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16153v1-abstract-short" style="display: inline;"> With the rise of AI technologies and their growing influence in the screenwriting field, understanding the opportunities and concerns related to AI&#39;s role in screenwriting is essential for enhancing human-AI co-creation. Through semi-structured interviews with 23 screenwriters, we explored their creative practices, attitudes, and expectations in collaborating with AI for screenwriting. Based on pa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16153v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16153v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16153v1-abstract-full" style="display: none;"> With the rise of AI technologies and their growing influence in the screenwriting field, understanding the opportunities and concerns related to AI&#39;s role in screenwriting is essential for enhancing human-AI co-creation. Through semi-structured interviews with 23 screenwriters, we explored their creative practices, attitudes, and expectations in collaborating with AI for screenwriting. Based on participants&#39; responses, we identified the key stages in which they commonly integrated AI, including story structure &amp; plot development, screenplay text, goal &amp; idea generation, and dialogue. Then, we examined how different attitudes toward AI integration influence screenwriters&#39; practices across various workflow stages and their broader impact on the industry. Additionally, we categorized their expected assistance using four distinct roles of AI: actor, audience, expert, and executor. Our findings provide insights into AI&#39;s impact on screenwriting practices and offer suggestions on how AI can benefit the future of screenwriting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16153v1-abstract-full').style.display = 'none'; document.getElementById('2502.16153v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CHI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16121">arXiv:2502.16121</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16121">pdf</a>, <a href="https://arxiv.org/format/2502.16121">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> From Target Tracking to Targeting Track -- Part II: Regularized Polynomial Trajectory Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tiancheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Yan Song</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+G">Guchong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16121v1-abstract-short" style="display: inline;"> Target tracking entails the estimation of the evolution of the target state over time, namely the target trajectory. Different from the classical state space model, our series of studies, including this paper, model the collection of the target state as a stochastic process (SP) that is further decomposed into a deterministic part which represents the trend of the trajectory and a residual SP repr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16121v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16121v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16121v1-abstract-full" style="display: none;"> Target tracking entails the estimation of the evolution of the target state over time, namely the target trajectory. Different from the classical state space model, our series of studies, including this paper, model the collection of the target state as a stochastic process (SP) that is further decomposed into a deterministic part which represents the trend of the trajectory and a residual SP representing the residual fitting error. Subsequently, the tracking problem is formulated as a learning problem regarding the trajectory SP for which a key part is to estimate a trajectory FoT (T-FoT) best fitting the measurements in time series. For this purpose, we consider the polynomial T-FoT and address the regularized polynomial T-FoT optimization employing two distinct regularization strategies seeking trade-off between the accuracy and simplicity. One limits the order of the polynomial and then the best choice is determined by grid searching in a narrow, bounded range while the other adopts $\ell_0$ norm regularization for which the hybrid Newton solver is employed. Simulation results obtained in both single and multiple maneuvering target scenarios demonstrate the effectiveness of our approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16121v1-abstract-full').style.display = 'none'; document.getElementById('2502.16121v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Part II of a series of companion papers; 11 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16114">arXiv:2502.16114</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16114">pdf</a>, <a href="https://arxiv.org/format/2502.16114">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> InterLink: Linking Text with Code and Output in Computational Notebooks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Y">Yanna Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+L">Leni Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haotian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+H">Huamin Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Moritz%2C+D">Dominik Moritz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16114v1-abstract-short" style="display: inline;"> Computational notebooks, widely used for ad-hoc analysis and often shared with others, can be difficult to understand because the standard linear layout is not optimized for reading. In particular, related text, code, and outputs may be spread across the UI making it difficult to draw connections. In response, we introduce InterLink, a plugin designed to present the relationships between text, cod&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16114v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16114v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16114v1-abstract-full" style="display: none;"> Computational notebooks, widely used for ad-hoc analysis and often shared with others, can be difficult to understand because the standard linear layout is not optimized for reading. In particular, related text, code, and outputs may be spread across the UI making it difficult to draw connections. In response, we introduce InterLink, a plugin designed to present the relationships between text, code, and outputs, thereby making notebooks easier to understand. In a formative study, we identify pain points and derive design requirements for identifying and navigating relationships among various pieces of information within notebooks. Based on these requirements, InterLink features a new layout that separates text from code and outputs into two columns. It uses visual links to signal relationships between text and associated code and outputs and offers interactions for navigating related pieces of information. In a user study with 12 participants, those using InterLink were 13.6% more accurate at finding and integrating information from complex analyses in computational notebooks. These results show the potential of notebook layouts that make them easier to understand. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16114v1-abstract-full').style.display = 'none'; document.getElementById('2502.16114v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at CHI Conference on Human Factors in Computing Systems (CHI&#39;25), April 26-May 1, 2025, Yokohama, Japan</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16033">arXiv:2502.16033</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16033">pdf</a>, <a href="https://arxiv.org/format/2502.16033">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Inconsistency Reasoning (MMIR): A New Benchmark for Multimodal Reasoning Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yan%2C+Q">Qianqi Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+Y">Yue Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongquan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+S">Shan Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yang Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Guan%2C+X">Xinze Guan</a>, <a href="/search/cs?searchtype=author&amp;query=Kuo%2C+C">Ching-Chen Kuo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X+E">Xin Eric Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16033v1-abstract-short" style="display: inline;"> Existing Multimodal Large Language Models (MLLMs) are predominantly trained and tested on consistent visual-textual inputs, leaving open the question of whether they can handle inconsistencies in real-world, layout-rich content. To bridge this gap, we propose the Multimodal Inconsistency Reasoning (MMIR) benchmark to assess MLLMs&#39; ability to detect and reason about semantic mismatches in artifacts&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16033v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16033v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16033v1-abstract-full" style="display: none;"> Existing Multimodal Large Language Models (MLLMs) are predominantly trained and tested on consistent visual-textual inputs, leaving open the question of whether they can handle inconsistencies in real-world, layout-rich content. To bridge this gap, we propose the Multimodal Inconsistency Reasoning (MMIR) benchmark to assess MLLMs&#39; ability to detect and reason about semantic mismatches in artifacts such as webpages, presentation slides, and posters. MMIR comprises 534 challenging samples, each containing synthetically injected errors across five reasoning-heavy categories: Factual Contradiction, Identity Misattribution, Contextual Mismatch, Quantitative Discrepancy, and Temporal/Spatial Incoherence. We evaluate six state-of-the-art MLLMs, showing that models with dedicated multimodal reasoning capabilities, such as o1, substantially outperform their counterparts while open-source models remain particularly vulnerable to inconsistency errors. Detailed error analyses further show that models excel in detecting inconsistencies confined to a single modality, particularly in text, but struggle with cross-modal conflicts and complex layouts. Probing experiments reveal that single-modality prompting, including Chain-of-Thought (CoT) and Set-of-Mark (SoM) methods, yields marginal gains, revealing a key bottleneck in cross-modal reasoning. Our findings highlight the need for advanced multimodal reasoning and point to future research on multimodal inconsistency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16033v1-abstract-full').style.display = 'none'; document.getElementById('2502.16033v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15851">arXiv:2502.15851</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15851">pdf</a>, <a href="https://arxiv.org/format/2502.15851">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Control Illusion: The Failure of Instruction Hierarchies in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Geng%2C+Y">Yilin Geng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haonan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Mu%2C+H">Honglin Mu</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+X">Xudong Han</a>, <a href="/search/cs?searchtype=author&amp;query=Baldwin%2C+T">Timothy Baldwin</a>, <a href="/search/cs?searchtype=author&amp;query=Abend%2C+O">Omri Abend</a>, <a href="/search/cs?searchtype=author&amp;query=Hovy%2C+E">Eduard Hovy</a>, <a href="/search/cs?searchtype=author&amp;query=Frermann%2C+L">Lea Frermann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15851v1-abstract-short" style="display: inline;"> Large language models (LLMs) are increasingly deployed with hierarchical instruction schemes, where certain instructions (e.g., system-level directives) are expected to take precedence over others (e.g., user messages). Yet, we lack a systematic understanding of how effectively these hierarchical control mechanisms work. We introduce a systematic evaluation framework based on constraint prioritiza&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15851v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15851v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15851v1-abstract-full" style="display: none;"> Large language models (LLMs) are increasingly deployed with hierarchical instruction schemes, where certain instructions (e.g., system-level directives) are expected to take precedence over others (e.g., user messages). Yet, we lack a systematic understanding of how effectively these hierarchical control mechanisms work. We introduce a systematic evaluation framework based on constraint prioritization to assess how well LLMs enforce instruction hierarchies. Our experiments across six state-of-the-art LLMs reveal that models struggle with consistent instruction prioritization, even for simple formatting conflicts. We find that the widely-adopted system/user prompt separation fails to establish a reliable instruction hierarchy, and models exhibit strong inherent biases toward certain constraint types regardless of their priority designation. While controlled prompt engineering and model fine-tuning show modest improvements, our results indicate that instruction hierarchy enforcement is not robustly realized, calling for deeper architectural innovations beyond surface-level modifications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15851v1-abstract-full').style.display = 'none'; document.getElementById('2502.15851v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15816">arXiv:2502.15816</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15816">pdf</a>, <a href="https://arxiv.org/format/2502.15816">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> GenAI at the Edge: Comprehensive Survey on Empowering Edge Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Navardi%2C+M">Mozhgan Navardi</a>, <a href="/search/cs?searchtype=author&amp;query=Aalishah%2C+R">Romina Aalishah</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Y">Yuzhe Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Y">Yueqian Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hai Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yiran Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Mohsenin%2C+T">Tinoosh Mohsenin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15816v1-abstract-short" style="display: inline;"> Generative Artificial Intelligence (GenAI) applies models and algorithms such as Large Language Model (LLM) and Foundation Model (FM) to generate new data. GenAI, as a promising approach, enables advanced capabilities in various applications, including text generation and image processing. In current practice, GenAI algorithms run mainly on the cloud server, leading to high latency and raising sec&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15816v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15816v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15816v1-abstract-full" style="display: none;"> Generative Artificial Intelligence (GenAI) applies models and algorithms such as Large Language Model (LLM) and Foundation Model (FM) to generate new data. GenAI, as a promising approach, enables advanced capabilities in various applications, including text generation and image processing. In current practice, GenAI algorithms run mainly on the cloud server, leading to high latency and raising security concerns. Consequently, these challenges encourage the deployment of GenAI algorithms directly on edge devices. However, the large size of such models and their significant computational resource requirements pose obstacles when deploying them in resource-constrained systems. This survey provides a comprehensive overview of recent proposed techniques that optimize GenAI for efficient deployment on resource-constrained edge devices. For this aim, this work highlights three main categories for bringing GenAI to the edge: software optimization, hardware optimization, and frameworks. The main takeaways for readers of this survey will be a clear roadmap to design, implement, and refine GenAI systems for real-world implementation on edge devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15816v1-abstract-full').style.display = 'none'; document.getElementById('2502.15816v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2025 Spring Symposium Series (SSS), GenAI@Edge: Empowering Generative AI at the Edge Symposium</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15811">arXiv:2502.15811</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15811">pdf</a>, <a href="https://arxiv.org/format/2502.15811">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Spiking Point Transformer for Point Cloud Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+P">Peixi Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Chai%2C+B">Bosong Chai</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hebei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+M">Menghua Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+Y">Yansong Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zeyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Nie%2C+X">Xuan Nie</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yueyi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+X">Xiaoyan Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15811v1-abstract-short" style="display: inline;"> Spiking Neural Networks (SNNs) offer an attractive and energy-efficient alternative to conventional Artificial Neural Networks (ANNs) due to their sparse binary activation. When SNN meets Transformer, it shows great potential in 2D image processing. However, their application for 3D point cloud remains underexplored. To this end, we present Spiking Point Transformer (SPT), the first transformer-ba&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15811v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15811v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15811v1-abstract-full" style="display: none;"> Spiking Neural Networks (SNNs) offer an attractive and energy-efficient alternative to conventional Artificial Neural Networks (ANNs) due to their sparse binary activation. When SNN meets Transformer, it shows great potential in 2D image processing. However, their application for 3D point cloud remains underexplored. To this end, we present Spiking Point Transformer (SPT), the first transformer-based SNN framework for point cloud classification. Specifically, we first design Queue-Driven Sampling Direct Encoding for point cloud to reduce computational costs while retaining the most effective support points at each time step. We introduce the Hybrid Dynamics Integrate-and-Fire Neuron (HD-IF), designed to simulate selective neuron activation and reduce over-reliance on specific artificial neurons. SPT attains state-of-the-art results on three benchmark datasets that span both real-world and synthetic datasets in the SNN domain. Meanwhile, the theoretical energy consumption of SPT is at least 6.4$\times$ less than its ANN counterpart. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15811v1-abstract-full').style.display = 'none'; document.getElementById('2502.15811v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15798">arXiv:2502.15798</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15798">pdf</a>, <a href="https://arxiv.org/format/2502.15798">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MaxSup: Overcoming Representation Collapse in Label Smoothing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yuxuan Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Heng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+Z">Zhi-Qi Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+X">Xudong Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Fritz%2C+M">Mario Fritz</a>, <a href="/search/cs?searchtype=author&amp;query=Keuper%2C+M">Margret Keuper</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15798v1-abstract-short" style="display: inline;"> Label Smoothing (LS) is widely adopted to curb overconfidence in neural network predictions and enhance generalization. However, previous research shows that LS can force feature representations into excessively tight clusters, eroding intra-class distinctions. More recent findings suggest that LS also induces overconfidence in misclassifications, yet the precise mechanism remained unclear. In thi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15798v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15798v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15798v1-abstract-full" style="display: none;"> Label Smoothing (LS) is widely adopted to curb overconfidence in neural network predictions and enhance generalization. However, previous research shows that LS can force feature representations into excessively tight clusters, eroding intra-class distinctions. More recent findings suggest that LS also induces overconfidence in misclassifications, yet the precise mechanism remained unclear. In this work, we decompose the loss term introduced by LS, revealing two key components: (i) a regularization term that functions only when the prediction is correct, and (ii) an error-enhancement term that emerges under misclassifications. This latter term compels the model to reinforce incorrect predictions with exaggerated certainty, further collapsing the feature space. To address these issues, we propose Max Suppression (MaxSup), which uniformly applies the intended regularization to both correct and incorrect predictions by penalizing the top-1 logit instead of the ground-truth logit. Through feature analyses, we show that MaxSup restores intra-class variation and sharpens inter-class boundaries. Extensive experiments on image classification and downstream tasks confirm that MaxSup is a more robust alternative to LS. Code is available at: https://github.com/ZhouYuxuanYX/Maximum-Suppression-Regularization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15798v1-abstract-full').style.display = 'none'; document.getElementById('2502.15798v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 9 Tables, preliminary work under review do not distribute</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15684">arXiv:2502.15684</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15684">pdf</a>, <a href="https://arxiv.org/format/2502.15684">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> An Agent Framework for Real-Time Financial Information Searching with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jinzheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jingshu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongguang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Y">Yiqing Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15684v1-abstract-short" style="display: inline;"> Financial decision-making requires processing vast amounts of real-time information while understanding their complex temporal relationships. While traditional search engines excel at providing real-time information access, they often struggle to comprehend sophisticated user intentions and contextual nuances. Conversely, Large Language Models (LLMs) demonstrate reasoning and interaction capabilit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15684v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15684v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15684v1-abstract-full" style="display: none;"> Financial decision-making requires processing vast amounts of real-time information while understanding their complex temporal relationships. While traditional search engines excel at providing real-time information access, they often struggle to comprehend sophisticated user intentions and contextual nuances. Conversely, Large Language Models (LLMs) demonstrate reasoning and interaction capabilities but may generate unreliable outputs without access to current data. While recent attempts have been made to combine LLMs with search capabilities, they suffer from (1) restricted access to specialized financial data, (2) static query structures that cannot adapt to dynamic market conditions, and (3) insufficient temporal awareness in result generation. To address these challenges, we present FinSearch, a novel agent-based search framework specifically designed for financial applications that interface with diverse financial data sources including market, stock, and news data. Innovatively, FinSearch comprises four components: (1) an LLM-based multi-step search pre-planner that decomposes user queries into structured sub-queries mapped to specific data sources through a graph representation; (2) a search executor with an LLM-based adaptive query rewriter that executes the searching of each sub-query while dynamically refining the sub-queries in its subsequent node based on intermediate search results; (3) a temporal weighting mechanism that prioritizes information relevance based on the deduced time context from the user&#39;s query; (4) an LLM-based response generator that synthesizes results into coherent, contextually appropriate outputs. To evaluate FinSearch, we construct FinSearchBench-24, a benchmark of 1,500 four-choice questions across the stock market, rate changes, monetary policy, and industry developments spanning from June to October 2024. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15684v1-abstract-full').style.display = 'none'; document.getElementById('2502.15684v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15652">arXiv:2502.15652</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15652">pdf</a>, <a href="https://arxiv.org/format/2502.15652">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Empowering LLMs with Logical Reasoning: A Comprehensive Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+F">Fengxiang Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoxuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Fenrong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=van+Rooij%2C+R">Robert van Rooij</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Z">Zhouchen Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15652v1-abstract-short" style="display: inline;"> Large language models (LLMs) have achieved remarkable successes on various natural language tasks. However, recent studies have found that there are still significant challenges to the logical reasoning abilities of LLMs. This paper summarizes and categorizes the main challenges into two aspects: (1) Logical question answering, LLMs often fail to generate the correct answer within complex logical&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15652v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15652v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15652v1-abstract-full" style="display: none;"> Large language models (LLMs) have achieved remarkable successes on various natural language tasks. However, recent studies have found that there are still significant challenges to the logical reasoning abilities of LLMs. This paper summarizes and categorizes the main challenges into two aspects: (1) Logical question answering, LLMs often fail to generate the correct answer within complex logical problem which requires sophisticated deductive, inductive or abductive reasoning given a collection of premises and constrains. (2) Logical consistency, LLMs are prone to producing responses contradicting themselves across different questions. For example, a state-of-the-art Macaw question-answering LLM answers Yes to both questions Is a magpie a bird? and Does a bird have wings? but answers No to Does a magpie have wings?. To facilitate this research direction, we comprehensively investigate the most cutting-edge methods and propose detailed taxonomies of these methods. Specifically, to accurately answer complex logic questions, previous methods can be categorized based on reliance on external solvers, prompts, pretraining, and fine-tuning. To avoid logical contradictions, we discuss concepts and solutions of various logical consistencies, including implication, negation, transitivity, factuality consistency, and their composites. In addition, we review commonly used benchmark datasets and evaluation metrics, and discuss promising research directions, such as extensions to modal logic to account for uncertainty, and efficient algorithms satisfying multiple logical consistencies simultaneously. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15652v1-abstract-full').style.display = 'none'; document.getElementById('2502.15652v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15470">arXiv:2502.15470</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15470">pdf</a>, <a href="https://arxiv.org/format/2502.15470">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PAPI: Exploiting Dynamic Parallelism in Large Language Model Decoding with a Processing-In-Memory-Enabled Computing System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=He%2C+Y">Yintao He</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Giannoula%2C+C">Christina Giannoula</a>, <a href="/search/cs?searchtype=author&amp;query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/cs?searchtype=author&amp;query=G%C3%B3mez-Luna%2C+J">Juan G贸mez-Luna</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Huawei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiaowei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Ying Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15470v1-abstract-short" style="display: inline;"> Large language models (LLMs) are widely used for natural language understanding and text generation. An LLM model relies on a time-consuming step called LLM decoding to generate output tokens. Several prior works focus on improving the performance of LLM decoding using parallelism techniques, such as batching and speculative decoding. State-of-the-art LLM decoding has both compute-bound and memory&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15470v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15470v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15470v1-abstract-full" style="display: none;"> Large language models (LLMs) are widely used for natural language understanding and text generation. An LLM model relies on a time-consuming step called LLM decoding to generate output tokens. Several prior works focus on improving the performance of LLM decoding using parallelism techniques, such as batching and speculative decoding. State-of-the-art LLM decoding has both compute-bound and memory-bound kernels. Some prior works statically identify and map these different kernels to a heterogeneous architecture consisting of both processing-in-memory (PIM) units and computation-centric accelerators. We observe that characteristics of LLM decoding kernels (e.g., whether or not a kernel is memory-bound) can change dynamically due to parameter changes to meet user and/or system demands, making (1) static kernel mapping to PIM units and computation-centric accelerators suboptimal, and (2) one-size-fits-all approach of designing PIM units inefficient due to a large degree of heterogeneity even in memory-bound kernels. In this paper, we aim to accelerate LLM decoding while considering the dynamically changing characteristics of the kernels involved. We propose PAPI (PArallel Decoding with PIM), a PIM-enabled heterogeneous architecture that exploits dynamic scheduling of compute-bound or memory-bound kernels to suitable hardware units. PAPI has two key mechanisms: (1) online kernel characterization to dynamically schedule kernels to the most suitable hardware units at runtime and (2) a PIM-enabled heterogeneous computing system that harmoniously orchestrates both computation-centric processing units and hybrid PIM units with different computing capabilities. Our experimental results on three broadly-used LLMs show that PAPI achieves 1.8$\times$ and 11.1$\times$ speedups over a state-of-the-art heterogeneous LLM accelerator and a state-of-the-art PIM-only LLM accelerator, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15470v1-abstract-full').style.display = 'none'; document.getElementById('2502.15470v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in ASPLOS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15296">arXiv:2502.15296</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15296">pdf</a>, <a href="https://arxiv.org/format/2502.15296">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Beyond Fixed Variables: Expanding-variate Time Series Forecasting via Flat Scheme and Spatio-temporal Focal Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Minbo Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+K">Kai Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Huan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Teng%2C+F">Fei Teng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dalin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tianrui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15296v1-abstract-short" style="display: inline;"> Multivariate Time Series Forecasting (MTSF) has long been a key research focus. Traditionally, these studies assume a fixed number of variables, but in real-world applications, Cyber-Physical Systems often expand as new sensors are deployed, increasing variables in MTSF. In light of this, we introduce a novel task, Expanding-variate Time Series Forecasting (EVTSF). This task presents unique challe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15296v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15296v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15296v1-abstract-full" style="display: none;"> Multivariate Time Series Forecasting (MTSF) has long been a key research focus. Traditionally, these studies assume a fixed number of variables, but in real-world applications, Cyber-Physical Systems often expand as new sensors are deployed, increasing variables in MTSF. In light of this, we introduce a novel task, Expanding-variate Time Series Forecasting (EVTSF). This task presents unique challenges, specifically (1) handling inconsistent data shapes caused by adding new variables, and (2) addressing imbalanced spatio-temporal learning, where expanding variables have limited observed data due to the necessity for timely operation. To address these challenges, we propose STEV, a flexible spatio-temporal forecasting framework. STEV includes a new Flat Scheme to tackle the inconsistent data shape issue, which extends the graph-based spatio-temporal modeling architecture into 1D space by flattening the 2D samples along the variable dimension, making the model variable-scale-agnostic while still preserving dynamic spatial correlations through a holistic graph. We introduce a novel Spatio-temporal Focal Learning strategy that incorporates a negative filter to resolve potential conflicts between contrastive learning and graph representation, and a focal contrastive loss as its core to guide the framework to focus on optimizing the expanding variables. We benchmark EVTSF performance using three real-world datasets and compare it against three potential solutions employing SOTA MTSF models tailored for EVSTF. Experimental results show that STEV significantly outperforms its competitors, particularly on expanding variables. Notably, STEV, with only 5% of observations from the expanding period, is on par with SOTA MTSF models trained with complete observations. Further exploration of various expanding strategies underscores the generalizability of STEV in real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15296v1-abstract-full').style.display = 'none'; document.getElementById('2502.15296v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15142">arXiv:2502.15142</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15142">pdf</a>, <a href="https://arxiv.org/format/2502.15142">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSE.2023.3337421">10.1109/TSE.2023.3337421 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> AccessFixer: Enhancing GUI Accessibility for Low Vision Users With R-GCN Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Mengxi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Huaxiao Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Chunyang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+G">Guangyong Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Han Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+J">Jian Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15142v1-abstract-short" style="display: inline;"> The Graphical User Interface (GUI) plays a critical role in the interaction between users and mobile applications (apps), aiming at facilitating the operation process. However, due to the variety of functions and non-standardized design, GUIs might have many accessibility issues, like the size of components being too small or their intervals being narrow. These issues would hinder the operation of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15142v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15142v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15142v1-abstract-full" style="display: none;"> The Graphical User Interface (GUI) plays a critical role in the interaction between users and mobile applications (apps), aiming at facilitating the operation process. However, due to the variety of functions and non-standardized design, GUIs might have many accessibility issues, like the size of components being too small or their intervals being narrow. These issues would hinder the operation of low vision users, preventing them from obtaining information accurately and conveniently. Although several technologies and methods have been proposed to address these issues, they are typically confined to issue identification, leaving the resolution in the hands of developers. Moreover, it can be challenging to ensure that the color, size, and interval of the fixed GUIs are appropriately compared to the original ones. In this work, we propose a novel approach named AccessFixer, which utilizes the Relational-Graph Convolutional Neural Network (R-GCN) to simultaneously fix three kinds of accessibility issues, including small sizes, narrow intervals, and low color contrast in GUIs. With AccessFixer, the fixed GUIs would have a consistent color palette, uniform intervals, and adequate size changes achieved through coordinated adjustments to the attributes of related components. Our experiments demonstrate the effectiveness and usefulness of AccessFixer in fixing GUI accessibility issues. After fixing 30 real-world apps, our approach solves an average of 81.2% of their accessibility issues. Also, we apply AccessFixer to 10 open-source apps by submitting the fixed results with pull requests (PRs) on GitHub. The results demonstrate that developers approve of our submitted fixed GUIs, with 8 PRs being merged or under fixing. A user study examines that low vision users host a positive attitude toward the GUIs fixed by our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15142v1-abstract-full').style.display = 'none'; document.getElementById('2502.15142v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 14 figures, has been published in IEEE Transactions on Software Engineering</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Software Engineering, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14938">arXiv:2502.14938</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14938">pdf</a>, <a href="https://arxiv.org/format/2502.14938">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GS-Cache: A GS-Cache Inference Framework for Large-scale Gaussian Splatting Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tao%2C+M">Miao Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yuanzhen Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+H">Haoran Xu</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+Z">Zeyu He</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhenyu Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuchang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+Z">Zhongling Su</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+L">Linning Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Z">Zhenxiang Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+R">Rong Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hengjie Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xingcheng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhai%2C+J">Jidong Zhai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14938v1-abstract-short" style="display: inline;"> Rendering large-scale 3D Gaussian Splatting (3DGS) model faces significant challenges in achieving real-time, high-fidelity performance on consumer-grade devices. Fully realizing the potential of 3DGS in applications such as virtual reality (VR) requires addressing critical system-level challenges to support real-time, immersive experiences. We propose GS-Cache, an end-to-end framework that seamle&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14938v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14938v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14938v1-abstract-full" style="display: none;"> Rendering large-scale 3D Gaussian Splatting (3DGS) model faces significant challenges in achieving real-time, high-fidelity performance on consumer-grade devices. Fully realizing the potential of 3DGS in applications such as virtual reality (VR) requires addressing critical system-level challenges to support real-time, immersive experiences. We propose GS-Cache, an end-to-end framework that seamlessly integrates 3DGS&#39;s advanced representation with a highly optimized rendering system. GS-Cache introduces a cache-centric pipeline to eliminate redundant computations, an efficiency-aware scheduler for elastic multi-GPU rendering, and optimized CUDA kernels to overcome computational bottlenecks. This synergy between 3DGS and system design enables GS-Cache to achieve up to 5.35x performance improvement, 35% latency reduction, and 42% lower GPU memory usage, supporting 2K binocular rendering at over 120 FPS with high visual quality. By bridging the gap between 3DGS&#39;s representation power and the demands of VR systems, GS-Cache establishes a scalable and efficient framework for real-time neural rendering in immersive environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14938v1-abstract-full').style.display = 'none'; document.getElementById('2502.14938v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14885">arXiv:2502.14885</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14885">pdf</a>, <a href="https://arxiv.org/format/2502.14885">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Pulmonary Tuberculosis Edge Diagnosis System Based on MindSpore Framework: Low-cost and High-precision Implementation with Ascend 310 Chip </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">HaoYu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14885v1-abstract-short" style="display: inline;"> Pulmonary Tuberculosis (PTB) remains a major challenge for global health, especially in areas with poor medical resources, where access to specialized medical knowledge and diagnostic tools is limited. This paper presents an auxiliary diagnosis system for pulmonary tuberculosis based on Huawei MindSpore framework and Ascend310 edge computing chip. Using MobileNetV3 architecture and Softmax cross e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14885v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14885v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14885v1-abstract-full" style="display: none;"> Pulmonary Tuberculosis (PTB) remains a major challenge for global health, especially in areas with poor medical resources, where access to specialized medical knowledge and diagnostic tools is limited. This paper presents an auxiliary diagnosis system for pulmonary tuberculosis based on Huawei MindSpore framework and Ascend310 edge computing chip. Using MobileNetV3 architecture and Softmax cross entropy loss function with momentum optimizer. The system operates with FP16 hybrid accuracy on the Orange pie AIPro (Atlas 200 DK) edge device and performs well. In the test set containing 4148 chest images, the model accuracy reached 99.1\% (AUC = 0.99), and the equipment cost was controlled within \$150, providing affordable AI-assisted diagnosis scheme for primary care. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14885v1-abstract-full').style.display = 'none'; document.getElementById('2502.14885v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14734">arXiv:2502.14734</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14734">pdf</a>, <a href="https://arxiv.org/format/2502.14734">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Sentence Smith: Formally Controllable Text Transformation and its Application to Evaluation of Text Embedding Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongji Li</a>, <a href="/search/cs?searchtype=author&amp;query=Michail%2C+A">Andrianos Michail</a>, <a href="/search/cs?searchtype=author&amp;query=Gubelmann%2C+R">Reto Gubelmann</a>, <a href="/search/cs?searchtype=author&amp;query=Clematide%2C+S">Simon Clematide</a>, <a href="/search/cs?searchtype=author&amp;query=Opitz%2C+J">Juri Opitz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14734v1-abstract-short" style="display: inline;"> We propose the Sentence Smith framework that enables controlled and specified manipulation of text meaning. It consists of three main steps: 1. Parsing a sentence into a semantic graph, 2. Applying human-designed semantic manipulation rules, and 3. Generating text from the manipulated graph. A final filtering step (4.) ensures the validity of the applied transformation. To demonstrate the utility&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14734v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14734v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14734v1-abstract-full" style="display: none;"> We propose the Sentence Smith framework that enables controlled and specified manipulation of text meaning. It consists of three main steps: 1. Parsing a sentence into a semantic graph, 2. Applying human-designed semantic manipulation rules, and 3. Generating text from the manipulated graph. A final filtering step (4.) ensures the validity of the applied transformation. To demonstrate the utility of Sentence Smith in an application study, we use it to generate hard negative pairs that challenge text embedding models. Since the controllable generation makes it possible to clearly isolate different types of semantic shifts, we can gain deeper insights into the specific strengths and weaknesses of widely used text embedding models, also addressing an issue in current benchmarking where linguistic phenomena remain opaque. Human validation confirms that the generations produced by Sentence Smith are highly accurate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14734v1-abstract-full').style.display = 'none'; document.getElementById('2502.14734v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14704">arXiv:2502.14704</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14704">pdf</a>, <a href="https://arxiv.org/format/2502.14704">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Not All Data are Good Labels: On the Self-supervised Labeling for Time Series Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yuxuan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dalin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+Y">Yuxuan Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+H">Hua Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Gang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Huan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14704v2-abstract-short" style="display: inline;"> Time Series Forecasting (TSF) is a crucial task in various domains, yet existing TSF models rely heavily on high-quality data and insufficiently exploit all available data. This paper explores a novel self-supervised approach to re-label time series datasets by inherently constructing candidate datasets. During the optimization of a simple reconstruction network, intermediates are used as pseudo l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14704v2-abstract-full').style.display = 'inline'; document.getElementById('2502.14704v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14704v2-abstract-full" style="display: none;"> Time Series Forecasting (TSF) is a crucial task in various domains, yet existing TSF models rely heavily on high-quality data and insufficiently exploit all available data. This paper explores a novel self-supervised approach to re-label time series datasets by inherently constructing candidate datasets. During the optimization of a simple reconstruction network, intermediates are used as pseudo labels in a self-supervised paradigm, improving generalization for any predictor. We introduce the Self-Correction with Adaptive Mask (SCAM), which discards overfitted components and selectively replaces them with pseudo labels generated from reconstructions. Additionally, we incorporate Spectral Norm Regularization (SNR) to further suppress overfitting from a loss landscape perspective. Our experiments on eleven real-world datasets demonstrate that SCAM consistently improves the performance of various backbone models. This work offers a new perspective on constructing datasets and enhancing the generalization of TSF models through self-supervised learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14704v2-abstract-full').style.display = 'none'; document.getElementById('2502.14704v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14247">arXiv:2502.14247</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14247">pdf</a>, <a href="https://arxiv.org/format/2502.14247">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Pandora3D: A Comprehensive Framework for High-Quality 3D Shape and Texture Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiayu Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Shang%2C+T">Taizhang Shang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+W">Weixuan Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+X">Xibin Song</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+Z">Ziang Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Senbo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Shenzhou Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Weizhe Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongdong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+P">Pan Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14247v2-abstract-short" style="display: inline;"> This report presents a comprehensive framework for generating high-quality 3D shapes and textures from diverse input prompts, including single images, multi-view images, and text descriptions. The framework consists of 3D shape generation and texture generation. (1). The 3D shape generation pipeline employs a Variational Autoencoder (VAE) to encode implicit 3D geometries into a latent space and a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14247v2-abstract-full').style.display = 'inline'; document.getElementById('2502.14247v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14247v2-abstract-full" style="display: none;"> This report presents a comprehensive framework for generating high-quality 3D shapes and textures from diverse input prompts, including single images, multi-view images, and text descriptions. The framework consists of 3D shape generation and texture generation. (1). The 3D shape generation pipeline employs a Variational Autoencoder (VAE) to encode implicit 3D geometries into a latent space and a diffusion network to generate latents conditioned on input prompts, with modifications to enhance model capacity. An alternative Artist-Created Mesh (AM) generation approach is also explored, yielding promising results for simpler geometries. (2). Texture generation involves a multi-stage process starting with frontal images generation followed by multi-view images generation, RGB-to-PBR texture conversion, and high-resolution multi-view texture refinement. A consistency scheduler is plugged into every stage, to enforce pixel-wise consistency among multi-view textures during inference, ensuring seamless integration. The pipeline demonstrates effective handling of diverse input formats, leveraging advanced neural architectures and novel methodologies to produce high-quality 3D content. This report details the system architecture, experimental results, and potential future directions to improve and expand the framework. The source code and pretrained weights are released at: https://github.com/Tencent/Tencent-XR-3DGen. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14247v2-abstract-full').style.display = 'none'; document.getElementById('2502.14247v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Tencent XR 3D Gen</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14235">arXiv:2502.14235</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14235">pdf</a>, <a href="https://arxiv.org/format/2502.14235">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> OG-Gaussian: Occupancy Based Street Gaussians for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Y">Yedong Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xinran Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Duan%2C+Y">Yifan Duan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shiqi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Heng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yilong Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+J">Jianmin Ji</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yanyong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14235v1-abstract-short" style="display: inline;"> Accurate and realistic 3D scene reconstruction enables the lifelike creation of autonomous driving simulation environments. With advancements in 3D Gaussian Splatting (3DGS), previous studies have applied it to reconstruct complex dynamic driving scenes. These methods typically require expensive LiDAR sensors and pre-annotated datasets of dynamic objects. To address these challenges, we propose OG&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14235v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14235v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14235v1-abstract-full" style="display: none;"> Accurate and realistic 3D scene reconstruction enables the lifelike creation of autonomous driving simulation environments. With advancements in 3D Gaussian Splatting (3DGS), previous studies have applied it to reconstruct complex dynamic driving scenes. These methods typically require expensive LiDAR sensors and pre-annotated datasets of dynamic objects. To address these challenges, we propose OG-Gaussian, a novel approach that replaces LiDAR point clouds with Occupancy Grids (OGs) generated from surround-view camera images using Occupancy Prediction Network (ONet). Our method leverages the semantic information in OGs to separate dynamic vehicles from static street background, converting these grids into two distinct sets of initial point clouds for reconstructing both static and dynamic objects. Additionally, we estimate the trajectories and poses of dynamic objects through a learning-based approach, eliminating the need for complex manual annotations. Experiments on Waymo Open dataset demonstrate that OG-Gaussian is on par with the current state-of-the-art in terms of reconstruction quality and rendering speed, achieving an average PSNR of 35.13 and a rendering speed of 143 FPS, while significantly reducing computational costs and economic overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14235v1-abstract-full').style.display = 'none'; document.getElementById('2502.14235v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14221">arXiv:2502.14221</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14221">pdf</a>, <a href="https://arxiv.org/format/2502.14221">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> H3DE-Net: Efficient and Accurate 3D Landmark Detection in Medical Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Zhen Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+R">Ronghao Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xiaoqian Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+Y">Yangbo Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Suhua Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+X">Xiaoxin Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Han Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Q">Qingsong Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14221v1-abstract-short" style="display: inline;"> 3D landmark detection is a critical task in medical image analysis, and accurately detecting anatomical landmarks is essential for subsequent medical imaging tasks. However, mainstream deep learning methods in this field struggle to simultaneously capture fine-grained local features and model global spatial relationships, while maintaining a balance between accuracy and computational efficiency. L&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14221v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14221v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14221v1-abstract-full" style="display: none;"> 3D landmark detection is a critical task in medical image analysis, and accurately detecting anatomical landmarks is essential for subsequent medical imaging tasks. However, mainstream deep learning methods in this field struggle to simultaneously capture fine-grained local features and model global spatial relationships, while maintaining a balance between accuracy and computational efficiency. Local feature extraction requires capturing fine-grained anatomical details, while global modeling requires understanding the spatial relationships within complex anatomical structures. The high-dimensional nature of 3D volume further exacerbates these challenges, as landmarks are sparsely distributed, leading to significant computational costs. Therefore, achieving efficient and precise 3D landmark detection remains a pressing challenge in medical image analysis. In this work, We propose a \textbf{H}ybrid \textbf{3}D \textbf{DE}tection \textbf{Net}(H3DE-Net), a novel framework that combines CNNs for local feature extraction with a lightweight attention mechanism designed to efficiently capture global dependencies in 3D volumetric data. This mechanism employs a hierarchical routing strategy to reduce computational cost while maintaining global context modeling. To our knowledge, H3DE-Net is the first 3D landmark detection model that integrates such a lightweight attention mechanism with CNNs. Additionally, integrating multi-scale feature fusion further enhances detection accuracy and robustness. Experimental results on a public CT dataset demonstrate that H3DE-Net achieves state-of-the-art(SOTA) performance, significantly improving accuracy and robustness, particularly in scenarios with missing landmarks or complex anatomical variations. We aready open-source our project, including code, data and model weights. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14221v1-abstract-full').style.display = 'none'; document.getElementById('2502.14221v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14012">arXiv:2502.14012</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14012">pdf</a>, <a href="https://arxiv.org/format/2502.14012">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Other Computer Science">cs.OH</span> </div> </div> <p class="title is-5 mathjax"> A double-layer placement algorithm for integrated circuit-based modules on printed circuit board </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hangyuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhaoyang Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Pang%2C+H">Haotian Pang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+N">Ning Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14012v1-abstract-short" style="display: inline;"> Considering that the physical design of printed circuit board (PCB) follows the principle of modularized design, this paper proposes an automatic placement algorithm for functional modules. We first model the placement problem as a mixed-variable optimization problem, and then, developed tailored algorithms of global placement and legalization for the top-layer centralized placement subproblem and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14012v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14012v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14012v1-abstract-full" style="display: none;"> Considering that the physical design of printed circuit board (PCB) follows the principle of modularized design, this paper proposes an automatic placement algorithm for functional modules. We first model the placement problem as a mixed-variable optimization problem, and then, developed tailored algorithms of global placement and legalization for the top-layer centralized placement subproblem and the bottom-layer pin-oriented placement subproblem. Numerical comparison demonstrates that the proposed mixed-variable optimization scheme can get optimized total wirelength of placement. Meanwhile, experimental results on several industrial PCB cases show that the developed centralized strategies can well accommodate the requirement of top-layer placement, and the pin-oriented global placement based on bin clustering contributes to optimized placement results meeting the requirement of pin-oriented design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14012v1-abstract-full').style.display = 'none'; document.getElementById('2502.14012v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14005">arXiv:2502.14005</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14005">pdf</a>, <a href="https://arxiv.org/format/2502.14005">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Smaller But Better: Unifying Layout Generation with Smaller Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+P">Peirong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiaxin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+J">Jiahuan Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongliang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+L">Lianwen Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14005v1-abstract-short" style="display: inline;"> We propose LGGPT, an LLM-based model tailored for unified layout generation. First, we propose Arbitrary Layout Instruction (ALI) and Universal Layout Response (ULR) as the uniform I/O template. ALI accommodates arbitrary layout generation task inputs across multiple layout domains, enabling LGGPT to unify both task-generic and domain-generic layout generation hitherto unexplored. Collectively, AL&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14005v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14005v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14005v1-abstract-full" style="display: none;"> We propose LGGPT, an LLM-based model tailored for unified layout generation. First, we propose Arbitrary Layout Instruction (ALI) and Universal Layout Response (ULR) as the uniform I/O template. ALI accommodates arbitrary layout generation task inputs across multiple layout domains, enabling LGGPT to unify both task-generic and domain-generic layout generation hitherto unexplored. Collectively, ALI and ULR boast a succinct structure that forgoes superfluous tokens typically found in existing HTML-based formats, facilitating efficient instruction tuning and boosting unified generation performance. In addition, we propose an Interval Quantization Encoding (IQE) strategy that compresses ALI into a more condensed structure. IQE precisely preserves valid layout clues while eliminating the less informative placeholders, facilitating LGGPT to capture complex and variable layout generation conditions during the unified training process. Experimental results demonstrate that LGGPT achieves superior or on par performance compared to existing methods. Notably, LGGPT strikes a prominent balance between proficiency and efficiency with a compact 1.5B parameter LLM, which beats prior 7B or 175B models even in the most extensive and challenging unified scenario. Furthermore, we underscore the necessity of employing LLMs for unified layout generation and suggest that 1.5B could be an optimal parameter size by comparing LLMs of varying scales. Code is available at https://github.com/NiceRingNode/LGGPT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14005v1-abstract-full').style.display = 'none'; document.getElementById('2502.14005v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13789">arXiv:2502.13789</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13789">pdf</a>, <a href="https://arxiv.org/format/2502.13789">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> From Correctness to Comprehension: AI Agents for Personalized Error Diagnosis in Education </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yi-Fan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+D">Dingjie Song</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+L">Lichao Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+T">Tianlong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+Q">Qingsong Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13789v1-abstract-short" style="display: inline;"> Large Language Models (LLMs), such as GPT-4, have demonstrated impressive mathematical reasoning capabilities, achieving near-perfect performance on benchmarks like GSM8K. However, their application in personalized education remains limited due to an overemphasis on correctness over error diagnosis and feedback generation. Current models fail to provide meaningful insights into the causes of stude&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13789v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13789v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13789v1-abstract-full" style="display: none;"> Large Language Models (LLMs), such as GPT-4, have demonstrated impressive mathematical reasoning capabilities, achieving near-perfect performance on benchmarks like GSM8K. However, their application in personalized education remains limited due to an overemphasis on correctness over error diagnosis and feedback generation. Current models fail to provide meaningful insights into the causes of student mistakes, limiting their utility in educational contexts. To address these challenges, we present three key contributions. First, we introduce \textbf{MathCCS} (Mathematical Classification and Constructive Suggestions), a multi-modal benchmark designed for systematic error analysis and tailored feedback. MathCCS includes real-world problems, expert-annotated error categories, and longitudinal student data. Evaluations of state-of-the-art models, including \textit{Qwen2-VL}, \textit{LLaVA-OV}, \textit{Claude-3.5-Sonnet} and \textit{GPT-4o}, reveal that none achieved classification accuracy above 30\% or generated high-quality suggestions (average scores below 4/10), highlighting a significant gap from human-level performance. Second, we develop a sequential error analysis framework that leverages historical data to track trends and improve diagnostic precision. Finally, we propose a multi-agent collaborative framework that combines a Time Series Agent for historical analysis and an MLLM Agent for real-time refinement, enhancing error classification and feedback generation. Together, these contributions provide a robust platform for advancing personalized education, bridging the gap between current AI capabilities and the demands of real-world teaching. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13789v1-abstract-full').style.display = 'none'; document.getElementById('2502.13789v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13753">arXiv:2502.13753</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13753">pdf</a>, <a href="https://arxiv.org/format/2502.13753">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SCALAR: Scientific Citation-based Live Assessment of Long-context Academic Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+R">Renxi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Mu%2C+H">Honglin Mu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+L">Liqun Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+L">Lizhi Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Y">Yunlong Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Baldwin%2C+T">Timothy Baldwin</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+X">Xudong Han</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haonan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13753v1-abstract-short" style="display: inline;"> Evaluating large language models&#39; (LLMs) long-context understanding capabilities remains challenging. We present SCALAR (Scientific Citation-based Live Assessment of Long-context Academic Reasoning), a novel benchmark that leverages academic papers and their citation networks. SCALAR features automatic generation of high-quality ground truth labels without human annotation, controllable difficulty&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13753v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13753v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13753v1-abstract-full" style="display: none;"> Evaluating large language models&#39; (LLMs) long-context understanding capabilities remains challenging. We present SCALAR (Scientific Citation-based Live Assessment of Long-context Academic Reasoning), a novel benchmark that leverages academic papers and their citation networks. SCALAR features automatic generation of high-quality ground truth labels without human annotation, controllable difficulty levels, and a dynamic updating mechanism that prevents data contamination. Using ICLR 2025 papers, we evaluate 8 state-of-the-art LLMs, revealing key insights about their capabilities and limitations in processing long scientific documents across different context lengths and reasoning types. Our benchmark provides a reliable and sustainable way to track progress in long-context understanding as LLM capabilities evolve. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13753v1-abstract-full').style.display = 'none'; document.getElementById('2502.13753v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13569">arXiv:2502.13569</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13569">pdf</a>, <a href="https://arxiv.org/format/2502.13569">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Model Evolution Framework with Genetic Algorithm for Multi-Task Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yu%2C+Y">Yan Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+W">Wengang Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yaodong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+W">Wanxuan Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+Y">Yingyan Hou</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Houqiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13569v1-abstract-short" style="display: inline;"> Multi-task reinforcement learning employs a single policy to complete various tasks, aiming to develop an agent with generalizability across different scenarios. Given the shared characteristics of tasks, the agent&#39;s learning efficiency can be enhanced through parameter sharing. Existing approaches typically use a routing network to generate specific routes for each task and reconstruct a set of m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13569v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13569v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13569v1-abstract-full" style="display: none;"> Multi-task reinforcement learning employs a single policy to complete various tasks, aiming to develop an agent with generalizability across different scenarios. Given the shared characteristics of tasks, the agent&#39;s learning efficiency can be enhanced through parameter sharing. Existing approaches typically use a routing network to generate specific routes for each task and reconstruct a set of modules into diverse models to complete multiple tasks simultaneously. However, due to the inherent difference between tasks, it is crucial to allocate resources based on task difficulty, which is constrained by the model&#39;s structure. To this end, we propose a Model Evolution framework with Genetic Algorithm (MEGA), which enables the model to evolve during training according to the difficulty of the tasks. When the current model is insufficient for certain tasks, the framework will automatically incorporate additional modules, enhancing the model&#39;s capabilities. Moreover, to adapt to our model evolution framework, we introduce a genotype module-level model, using binary sequences as genotype policies for model reconstruction, while leveraging a non-gradient genetic algorithm to optimize these genotype policies. Unlike routing networks with fixed output dimensions, our approach allows for the dynamic adjustment of the genotype policy length, enabling it to accommodate models with a varying number of modules. We conducted experiments on various robotics manipulation tasks in the Meta-World benchmark. Our state-of-the-art performance demonstrated the effectiveness of the MEGA framework. We will release our source code to the public. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13569v1-abstract-full').style.display = 'none'; document.getElementById('2502.13569v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13542">arXiv:2502.13542</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13542">pdf</a>, <a href="https://arxiv.org/format/2502.13542">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Activation-aware Probe-Query: Effective Key-Value Retrieval for Long-Context LLMs Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Q">Qingfa Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiachuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+C">Cheng Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+J">Jiaqi Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shuangyin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yongqi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Lei Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13542v1-abstract-short" style="display: inline;"> Recent advances in large language models (LLMs) have showcased exceptional performance in long-context tasks, while facing significant inference efficiency challenges with limited GPU memory. Existing solutions first proposed the sliding-window approach to accumulate a set of historical \textbf{key-value} (KV) pairs for reuse, then further improvements selectively retain its subsets at each step.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13542v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13542v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13542v1-abstract-full" style="display: none;"> Recent advances in large language models (LLMs) have showcased exceptional performance in long-context tasks, while facing significant inference efficiency challenges with limited GPU memory. Existing solutions first proposed the sliding-window approach to accumulate a set of historical \textbf{key-value} (KV) pairs for reuse, then further improvements selectively retain its subsets at each step. However, due to the sparse attention distribution across a long context, it is hard to identify and recall relevant KV pairs, as the attention is distracted by massive candidate pairs. Additionally, we found it promising to select representative tokens as probe-Query in each sliding window to effectively represent the entire context, which is an approach overlooked by existing methods. Thus, we propose \textbf{ActQKV}, a training-free, \textbf{Act}ivation-aware approach that dynamically determines probe-\textbf{Q}uery and leverages it to retrieve the relevant \textbf{KV} pairs for inference. Specifically, ActQKV monitors a token-level indicator, Activation Bias, within each context window, enabling the proper construction of probe-Query for retrieval at pre-filling stage. To accurately recall the relevant KV pairs and minimize the irrelevant ones, we design a dynamic KV cut-off mechanism guided by information density across layers at the decoding stage. Experiments on the Long-Bench and $\infty$ Benchmarks demonstrate its state-of-the-art performance with competitive inference quality and resource efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13542v1-abstract-full').style.display = 'none'; document.getElementById('2502.13542v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13533">arXiv:2502.13533</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13533">pdf</a>, <a href="https://arxiv.org/format/2502.13533">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Train Small, Infer Large: Memory-Efficient LoRA Training for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jue Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Huan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Shou%2C+L">Lidan Shou</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+K">Ke Chen</a>, <a href="/search/cs?searchtype=author&amp;query=You%2C+Y">Yang You</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+G">Guiming Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+X">Xuejian Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+K">Kunlong Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13533v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have significantly advanced natural language processing with exceptional task generalization capabilities. Low-Rank Adaption (LoRA) offers a cost-effective fine-tuning solution, freezing the original model parameters and training only lightweight, low-rank adapter matrices. However, the memory footprint of LoRA is largely dominated by the original model parameters. To&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13533v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13533v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13533v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have significantly advanced natural language processing with exceptional task generalization capabilities. Low-Rank Adaption (LoRA) offers a cost-effective fine-tuning solution, freezing the original model parameters and training only lightweight, low-rank adapter matrices. However, the memory footprint of LoRA is largely dominated by the original model parameters. To mitigate this, we propose LoRAM, a memory-efficient LoRA training scheme founded on the intuition that many neurons in over-parameterized LLMs have low training utility but are essential for inference. LoRAM presents a unique twist: it trains on a pruned (small) model to obtain pruned low-rank matrices, which are then recovered and utilized with the original (large) model for inference. Additionally, minimal-cost continual pre-training, performed by the model publishers in advance, aligns the knowledge discrepancy between pruned and original models. Our extensive experiments demonstrate the efficacy of LoRAM across various pruning strategies and downstream tasks. For a model with 70 billion parameters, LoRAM enables training on a GPU with only 20G HBM, replacing an A100-80G GPU for LoRA training and 15 GPUs for full fine-tuning. Specifically, QLoRAM implemented by structured pruning combined with 4-bit quantization, for LLaMA-3.1-70B (LLaMA-2-70B), reduces the parameter storage cost that dominates the memory usage in low-rank matrix training by 15.81$\times$ (16.95$\times$), while achieving dominant performance gains over both the original LLaMA-3.1-70B (LLaMA-2-70B) and LoRA-trained LLaMA-3.1-8B (LLaMA-2-13B). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13533v1-abstract-full').style.display = 'none'; document.getElementById('2502.13533v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13480">arXiv:2502.13480</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13480">pdf</a>, <a href="https://arxiv.org/format/2502.13480">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Astra: Efficient and Money-saving Automatic Parallel Strategies Search on Heterogeneous GPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+P">Peiran Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haibing Li</a>, <a href="/search/cs?searchtype=author&amp;query=Haohan%2C+F">Fu Haohan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shiyong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yanpeng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+D">Dou Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13480v1-abstract-short" style="display: inline;"> In this paper, we introduce an efficient and money-saving automatic parallel strategies search framework on heterogeneous GPUs: Astra. First, Astra searches for the efficiency-optimal parallel strategy in both GPU configurations search space (GPU types and GPU numbers) and parallel parameters search space. Then, Astra also provides the solution on heterogeneous GPUs by mathematically modeling the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13480v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13480v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13480v1-abstract-full" style="display: none;"> In this paper, we introduce an efficient and money-saving automatic parallel strategies search framework on heterogeneous GPUs: Astra. First, Astra searches for the efficiency-optimal parallel strategy in both GPU configurations search space (GPU types and GPU numbers) and parallel parameters search space. Then, Astra also provides the solution on heterogeneous GPUs by mathematically modeling the time consumption of heterogeneous training. At last, Astra is the first to propose the automatic parallel strategy search on money-saving. The experiment results demonstrate that Astra can achieve better throughput than expert-designed strategies. The search time cost for Astra can also be limited to 1.27 seconds in a single-GPU setting and less than 1.35 minutes in a heterogeneous-GPU setting on average with an accuracy of over 95%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13480v1-abstract-full').style.display = 'none'; document.getElementById('2502.13480v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13428">arXiv:2502.13428</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13428">pdf</a>, <a href="https://arxiv.org/format/2502.13428">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MCTS-KBQA: Monte Carlo Tree Search for Knowledge Base Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+G">Guanming Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haochen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+W">Wen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13428v1-abstract-short" style="display: inline;"> This study explores how to enhance the reasoning capabilities of large language models (LLMs) in knowledge base question answering (KBQA) by leveraging Monte Carlo Tree Search (MCTS). Semantic parsing-based KBQA methods are particularly challenging as these approaches require locating elements from knowledge bases and generating logical forms, demanding not only extensive annotated data but also s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13428v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13428v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13428v1-abstract-full" style="display: none;"> This study explores how to enhance the reasoning capabilities of large language models (LLMs) in knowledge base question answering (KBQA) by leveraging Monte Carlo Tree Search (MCTS). Semantic parsing-based KBQA methods are particularly challenging as these approaches require locating elements from knowledge bases and generating logical forms, demanding not only extensive annotated data but also strong reasoning capabilities. Although recent approaches leveraging LLMs as agents have demonstrated considerable potential, these studies are inherently constrained by their linear decision-making processes. To address this limitation, we propose a MCTS-based framework that enhances LLMs&#39; reasoning capabilities through tree search methodology. We design a carefully designed step-wise reward mechanism that requires only direct prompting of open-source instruction LLMs without additional fine-tuning. Experimental results demonstrate that our approach significantly outperforms linear decision-making methods, particularly in low-resource scenarios. Additionally, we contribute new data resources to the KBQA community by annotating intermediate reasoning processes for existing question-SPARQL datasets using distant supervision. Experimental results on the extended dataset demonstrate that our method achieves comparable performance to fully supervised models while using significantly less training data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13428v1-abstract-full').style.display = 'none'; document.getElementById('2502.13428v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13352">arXiv:2502.13352</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13352">pdf</a>, <a href="https://arxiv.org/format/2502.13352">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Integrated Sensing and Communication for 6G Holographic Digital Twins </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Haijun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Ziyang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiangnan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Wei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haojin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+C">Chen Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13352v1-abstract-short" style="display: inline;"> With the advent of 6G networks, offering ultra-high bandwidth and ultra-low latency, coupled with the enhancement of terminal device resolutions, holographic communication is gradually becoming a reality. Holographic digital twin (HDT) is considered one of key applications of holographic communication, capable of creating virtual replicas for real-time mapping and prediction of physical entity sta&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13352v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13352v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13352v1-abstract-full" style="display: none;"> With the advent of 6G networks, offering ultra-high bandwidth and ultra-low latency, coupled with the enhancement of terminal device resolutions, holographic communication is gradually becoming a reality. Holographic digital twin (HDT) is considered one of key applications of holographic communication, capable of creating virtual replicas for real-time mapping and prediction of physical entity states, and performing three-dimensional reproduction of spatial information. In this context, integrated sensing and communication (ISAC) is expected to be a crucial pathway for providing data sources to HDT. This paper proposes a four-layer architecture assisted by ISAC for HDT, integrating emerging paradigms and key technologies to achieve low-cost, high-precision environmental data collection for constructing HDT. Specifically, to enhance sensing resolution, we explore super-resolution techniques from the perspectives of parameter estimation and point cloud construction. Additionally, we focus on multi-point collaborative sensing for constructing HDT, and provide a comprehensive review of four key techniques: node selection, multi-band collaboration, cooperative beamforming, and data fusion. Finally, we highlight several interesting research directions to guide and inspire future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13352v1-abstract-full').style.display = 'none'; document.getElementById('2502.13352v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13125">arXiv:2502.13125</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13125">pdf</a>, <a href="https://arxiv.org/format/2502.13125">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RuozhiBench: Evaluating LLMs with Logical Fallacies and Misleading Premises </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhai%2C+Z">Zenan Zhai</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+X">Xudong Han</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhenxuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yixuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Baldwin%2C+T">Timothy Baldwin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haonan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13125v1-abstract-short" style="display: inline;"> Recent advances in large language models (LLMs) have shown that they can answer questions requiring complex reasoning. However, their ability to identify and respond to text containing logical fallacies or deliberately misleading premises remains less studied. To address this gap, we introduce RuozhiBench, a bilingual dataset comprising 677 carefully curated questions that contain various forms of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13125v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13125v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13125v1-abstract-full" style="display: none;"> Recent advances in large language models (LLMs) have shown that they can answer questions requiring complex reasoning. However, their ability to identify and respond to text containing logical fallacies or deliberately misleading premises remains less studied. To address this gap, we introduce RuozhiBench, a bilingual dataset comprising 677 carefully curated questions that contain various forms of deceptive reasoning, meticulously crafted through extensive human effort and expert review. In a comprehensive evaluation of 17 LLMs from 5 Series over RuozhiBench using both open-ended and two-choice formats, we conduct extensive analyses on evaluation protocols and result patterns. Despite their high scores on conventional benchmarks, these models showed limited ability to detect and reason correctly about logical fallacies, with even the best-performing model, Claude-3-haiku, achieving only 62% accuracy compared to the human of more than 90%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13125v1-abstract-full').style.display = 'none'; document.getElementById('2502.13125v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12958">arXiv:2502.12958</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.12958">pdf</a>, <a href="https://arxiv.org/format/2502.12958">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICDE60146.2024.00173">10.1109/ICDE60146.2024.00173 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Preventing the Popular Item Embedding Based Attack in Federated Recommendations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Huan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Rong%2C+D">Dazhong Rong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yan Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+K">Ke Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Shou%2C+L">Lidan Shou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12958v1-abstract-short" style="display: inline;"> Privacy concerns have led to the rise of federated recommender systems (FRS), which can create personalized models across distributed clients. However, FRS is vulnerable to poisoning attacks, where malicious users manipulate gradients to promote their target items intentionally. Existing attacks against FRS have limitations, as they depend on specific models and prior knowledge, restricting their&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12958v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12958v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12958v1-abstract-full" style="display: none;"> Privacy concerns have led to the rise of federated recommender systems (FRS), which can create personalized models across distributed clients. However, FRS is vulnerable to poisoning attacks, where malicious users manipulate gradients to promote their target items intentionally. Existing attacks against FRS have limitations, as they depend on specific models and prior knowledge, restricting their real-world applicability. In our exploration of practical FRS vulnerabilities, we devise a model-agnostic and prior-knowledge-free attack, named PIECK (Popular Item Embedding based Attack). The core module of PIECK is popular item mining, which leverages embedding changes during FRS training to effectively identify the popular items. Built upon the core module, PIECK branches into two diverse solutions: The PIECKIPE solution employs an item popularity enhancement module, which aligns the embeddings of targeted items with the mined popular items to increase item exposure. The PIECKUEA further enhances the robustness of the attack by using a user embedding approximation module, which approximates private user embeddings using mined popular items. Upon identifying PIECK, we evaluate existing federated defense methods and find them ineffective against PIECK, as poisonous gradients inevitably overwhelm the cold target items. We then propose a novel defense method by introducing two regularization terms during user training, which constrain item popularity enhancement and user embedding approximation while preserving FRS performance. We evaluate PIECK and its defense across two base models, three real datasets, four top-tier attacks, and six general defense methods, affirming the efficacy of both PIECK and its defense. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12958v1-abstract-full').style.display = 'none'; document.getElementById('2502.12958v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICDE 2024, Extension</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12900">arXiv:2502.12900</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.12900">pdf</a>, <a href="https://arxiv.org/format/2502.12900">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Soundwave: Less is More for Speech-Text Alignment in LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuhao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhiheng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Bu%2C+F">Fan Bu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+R">Ruiyu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+B">Benyou Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haizhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12900v1-abstract-short" style="display: inline;"> Existing end-to-end speech large language models (LLMs) usually rely on large-scale annotated data for training, while data-efficient training has not been discussed in depth. We focus on two fundamental problems between speech and text: the representation space gap and sequence length inconsistency. We propose Soundwave, which utilizes an efficient training strategy and a novel architecture to ad&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12900v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12900v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12900v1-abstract-full" style="display: none;"> Existing end-to-end speech large language models (LLMs) usually rely on large-scale annotated data for training, while data-efficient training has not been discussed in depth. We focus on two fundamental problems between speech and text: the representation space gap and sequence length inconsistency. We propose Soundwave, which utilizes an efficient training strategy and a novel architecture to address these issues. Results show that Soundwave outperforms the advanced Qwen2-Audio in speech translation and AIR-Bench speech tasks, using only one-fiftieth of the training data. Further analysis shows that Soundwave still retains its intelligence during conversation. The project is available at https://github.com/FreedomIntelligence/Soundwave. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12900v1-abstract-full').style.display = 'none'; document.getElementById('2502.12900v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12893">arXiv:2502.12893</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.12893">pdf</a>, <a href="https://arxiv.org/format/2502.12893">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> H-CoT: Hijacking the Chain-of-Thought Safety Reasoning Mechanism to Jailbreak Large Reasoning Models, Including OpenAI o1/o3, DeepSeek-R1, and Gemini 2.0 Flash Thinking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kuo%2C+M">Martin Kuo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jianyi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+A">Aolin Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Q">Qinsi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=DiValentin%2C+L">Louis DiValentin</a>, <a href="/search/cs?searchtype=author&amp;query=Bao%2C+Y">Yujia Bao</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+W">Wei Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Juan%2C+D">Da-Cheng Juan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hai Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yiran Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12893v1-abstract-short" style="display: inline;"> Large Reasoning Models (LRMs) have recently extended their powerful reasoning capabilities to safety checks-using chain-of-thought reasoning to decide whether a request should be answered. While this new approach offers a promising route for balancing model utility and safety, its robustness remains underexplored. To address this gap, we introduce Malicious-Educator, a benchmark that disguises ext&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12893v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12893v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12893v1-abstract-full" style="display: none;"> Large Reasoning Models (LRMs) have recently extended their powerful reasoning capabilities to safety checks-using chain-of-thought reasoning to decide whether a request should be answered. While this new approach offers a promising route for balancing model utility and safety, its robustness remains underexplored. To address this gap, we introduce Malicious-Educator, a benchmark that disguises extremely dangerous or malicious requests beneath seemingly legitimate educational prompts. Our experiments reveal severe security flaws in popular commercial-grade LRMs, including OpenAI o1/o3, DeepSeek-R1, and Gemini 2.0 Flash Thinking. For instance, although OpenAI&#39;s o1 model initially maintains a high refusal rate of about 98%, subsequent model updates significantly compromise its safety; and attackers can easily extract criminal strategies from DeepSeek-R1 and Gemini 2.0 Flash Thinking without any additional tricks. To further highlight these vulnerabilities, we propose Hijacking Chain-of-Thought (H-CoT), a universal and transferable attack method that leverages the model&#39;s own displayed intermediate reasoning to jailbreak its safety reasoning mechanism. Under H-CoT, refusal rates sharply decline-dropping from 98% to below 2%-and, in some instances, even transform initially cautious tones into ones that are willing to provide harmful content. We hope these findings underscore the urgent need for more robust safety mechanisms to preserve the benefits of advanced reasoning capabilities without compromising ethical standards. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12893v1-abstract-full').style.display = 'none'; document.getElementById('2502.12893v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12674">arXiv:2502.12674</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.12674">pdf</a>, <a href="https://arxiv.org/format/2502.12674">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SATA: Safe and Adaptive Torque-Based Locomotion Policies Inspired by Animal Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+P">Peizhuo Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongyi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+G">Ge Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jin Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xinrong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Bellegarda%2C+G">Guillaume Bellegarda</a>, <a href="/search/cs?searchtype=author&amp;query=Shafiee%2C+M">Milad Shafiee</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yuhong Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Ijspeert%2C+A">Auke Ijspeert</a>, <a href="/search/cs?searchtype=author&amp;query=Sartoretti%2C+G">Guillaume Sartoretti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12674v1-abstract-short" style="display: inline;"> Despite recent advances in learning-based controllers for legged robots, deployments in human-centric environments remain limited by safety concerns. Most of these approaches use position-based control, where policies output target joint angles that must be processed by a low-level controller (e.g., PD or impedance controllers) to compute joint torques. Although impressive results have been achiev&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12674v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12674v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12674v1-abstract-full" style="display: none;"> Despite recent advances in learning-based controllers for legged robots, deployments in human-centric environments remain limited by safety concerns. Most of these approaches use position-based control, where policies output target joint angles that must be processed by a low-level controller (e.g., PD or impedance controllers) to compute joint torques. Although impressive results have been achieved in controlled real-world scenarios, these methods often struggle with compliance and adaptability when encountering environments or disturbances unseen during training, potentially resulting in extreme or unsafe behaviors. Inspired by how animals achieve smooth and adaptive movements by controlling muscle extension and contraction, torque-based policies offer a promising alternative by enabling precise and direct control of the actuators in torque space. In principle, this approach facilitates more effective interactions with the environment, resulting in safer and more adaptable behaviors. However, challenges such as a highly nonlinear state space and inefficient exploration during training have hindered their broader adoption. To address these limitations, we propose SATA, a bio-inspired framework that mimics key biomechanical principles and adaptive learning mechanisms observed in animal locomotion. Our approach effectively addresses the inherent challenges of learning torque-based policies by significantly improving early-stage exploration, leading to high-performance final policies. Remarkably, our method achieves zero-shot sim-to-real transfer. Our experimental results indicate that SATA demonstrates remarkable compliance and safety, even in challenging environments such as soft/slippery terrain or narrow passages, and under significant external disturbances, highlighting its potential for practical deployments in human-centric and safety-critical scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12674v1-abstract-full').style.display = 'none'; document.getElementById('2502.12674v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12518">arXiv:2502.12518</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.12518">pdf</a>, <a href="https://arxiv.org/ps/2502.12518">ps</a>, <a href="https://arxiv.org/format/2502.12518">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> New Constant Dimension Codes From the Inserting Mixed Dimension Construction and Multilevel Construction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Han Li</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+F">Fang-Wei Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12518v1-abstract-short" style="display: inline;"> Constant dimension codes (CDCs) are essential for error correction in random network coding. A fundamental problem of CDCs is to determine their maximal possible size for given parameters. Inserting construction and multilevel construction are two effective techniques for constructing CDCs. We first provide a sufficient condition for a subspace to be added to the code from the mixed dimension cons&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12518v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12518v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12518v1-abstract-full" style="display: none;"> Constant dimension codes (CDCs) are essential for error correction in random network coding. A fundamental problem of CDCs is to determine their maximal possible size for given parameters. Inserting construction and multilevel construction are two effective techniques for constructing CDCs. We first provide a sufficient condition for a subspace to be added to the code from the mixed dimension construction in Lao et al. (IEEE Trans. Inf. Theory 69(7): 4333-4344, 2023). By appropriately combining matrix blocks from small CDCs and rank-metric codes, we introduce three inserting constructions based on the mixed dimension construction. Furthermore, the mixed dimension construction and these inserting constructions are improved by the multilevel construction that is based on lifting rank-restricted Ferrers diagram rank-metric codes. Our constructions yield some new lower bounds for CDCs, which are superior to the previously best-known ones. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12518v1-abstract-full').style.display = 'none'; document.getElementById('2502.12518v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12494">arXiv:2502.12494</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.12494">pdf</a>, <a href="https://arxiv.org/format/2502.12494">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EDGE: Efficient Data Selection for LLM Agents via Guideline Effectiveness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yunxiao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+G">Guanming Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haochen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+W">Wen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12494v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have shown remarkable capabilities as AI agents. However, existing methods for enhancing LLM-agent abilities often lack a focus on data quality, leading to inefficiencies and suboptimal results in both fine-tuning and prompt engineering. To address this issue, we introduce EDGE, a novel approach for identifying informative samples without needing golden answers. We pro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12494v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12494v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12494v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have shown remarkable capabilities as AI agents. However, existing methods for enhancing LLM-agent abilities often lack a focus on data quality, leading to inefficiencies and suboptimal results in both fine-tuning and prompt engineering. To address this issue, we introduce EDGE, a novel approach for identifying informative samples without needing golden answers. We propose the Guideline Effectiveness (GE) metric, which selects challenging samples by measuring the impact of human-provided guidelines in multi-turn interaction tasks. A low GE score indicates that the human expertise required for a sample is missing from the guideline, making the sample more informative. By selecting samples with low GE scores, we can improve the efficiency and outcomes of both prompt engineering and fine-tuning processes for LLMs. Extensive experiments validate the performance of our method. Our method achieves competitive results on the HotpotQA and WebShop and datasets, requiring 75\% and 50\% less data, respectively, while outperforming existing methods. We also provide a fresh perspective on the data quality of LLM-agent fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12494v1-abstract-full').style.display = 'none'; document.getElementById('2502.12494v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12167">arXiv:2502.12167</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.12167">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TastepepAI, An artificial intelligence platform for taste peptide de novo design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yue%2C+J">Jianda Yue</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tingting Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+J">Jian Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+H">Hua Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zihui Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+C">Changsheng Han</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Huanyu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+S">Songping Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhonghua Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhonghua Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Ying Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12167v1-abstract-short" style="display: inline;"> Taste peptides have emerged as promising natural flavoring agents attributed to their unique organoleptic properties, high safety profile, and potential health benefits. However, the de novo identification of taste peptides derived from animal, plant, or microbial sources remains a time-consuming and resource-intensive process, significantly impeding their widespread application in the food indust&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12167v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12167v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12167v1-abstract-full" style="display: none;"> Taste peptides have emerged as promising natural flavoring agents attributed to their unique organoleptic properties, high safety profile, and potential health benefits. However, the de novo identification of taste peptides derived from animal, plant, or microbial sources remains a time-consuming and resource-intensive process, significantly impeding their widespread application in the food industry. Here, we present TastePepAI, a comprehensive artificial intelligence framework for customized taste peptide design and safety assessment. As the key element of this framework, a loss-supervised adaptive variational autoencoder (LA-VAE) is implemented to efficiently optimizes the latent representation of sequences during training and facilitates the generation of target peptides with desired taste profiles. Notably, our model incorporates a novel taste-avoidance mechanism, allowing for selective flavor exclusion. Subsequently, our in-house developed toxicity prediction algorithm (SpepToxPred) is integrated in the framework to undergo rigorous safety evaluation of generated peptides. Using this integrated platform, we successfully identified 73 peptides exhibiting sweet, salty, and umami, significantly expanding the current repertoire of taste peptides. This work demonstrates the potential of TastePepAI in accelerating taste peptide discovery for food applications and provides a versatile framework adaptable to broader peptide engineering challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12167v1-abstract-full').style.display = 'none'; document.getElementById('2502.12167v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">40 pages, 6 figures, research article</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11678">arXiv:2502.11678</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11678">pdf</a>, <a href="https://arxiv.org/format/2502.11678">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploring LLM-based Student Simulation for Metacognitive Cultivation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoxuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Jifan Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Cong%2C+X">Xin Cong</a>, <a href="/search/cs?searchtype=author&amp;query=Dang%2C+Y">Yang Dang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhan%2C+Y">Yisi Zhan</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Huiqin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhiyuan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11678v1-abstract-short" style="display: inline;"> Metacognitive education plays a crucial role in cultivating students&#39; self-regulation and reflective thinking, providing essential support for those with learning difficulties through academic advising. Simulating students with insufficient learning capabilities using large language models offers a promising approach to refining pedagogical methods without ethical concerns. However, existing simul&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11678v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11678v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11678v1-abstract-full" style="display: none;"> Metacognitive education plays a crucial role in cultivating students&#39; self-regulation and reflective thinking, providing essential support for those with learning difficulties through academic advising. Simulating students with insufficient learning capabilities using large language models offers a promising approach to refining pedagogical methods without ethical concerns. However, existing simulations often fail to authentically represent students&#39; learning struggles and face challenges in evaluation due to the lack of reliable metrics and ethical constraints in data collection. To address these issues, we propose a pipeline for automatically generating and filtering high-quality simulated student agents. Our approach leverages a two-round automated scoring system validated by human experts and employs a score propagation module to obtain more consistent scores across the student graph. Experimental results demonstrate that our pipeline efficiently identifies high-quality student agents, and we discuss the traits that influence the simulation&#39;s effectiveness. By simulating students with varying degrees of learning difficulties, our work paves the way for broader applications in personalized learning and educational assessment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11678v1-abstract-full').style.display = 'none'; document.getElementById('2502.11678v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11656">arXiv:2502.11656</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11656">pdf</a>, <a href="https://arxiv.org/format/2502.11656">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Uncovering the Impact of Chain-of-Thought Reasoning for Direct Preference Optimization: Lessons from Text-to-SQL </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Hanbing Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiaokang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+R">Ruotong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+H">Haiyong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+T">Tian Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+Q">Qi Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jing Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11656v1-abstract-short" style="display: inline;"> Direct Preference Optimization (DPO) has proven effective in complex reasoning tasks like math word problems and code generation. However, when applied to Text-to-SQL datasets, it often fails to improve performance and can even degrade it. Our investigation reveals the root cause: unlike math and code tasks, which naturally integrate Chain-of-Thought (CoT) reasoning with DPO, Text-to-SQL datasets&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11656v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11656v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11656v1-abstract-full" style="display: none;"> Direct Preference Optimization (DPO) has proven effective in complex reasoning tasks like math word problems and code generation. However, when applied to Text-to-SQL datasets, it often fails to improve performance and can even degrade it. Our investigation reveals the root cause: unlike math and code tasks, which naturally integrate Chain-of-Thought (CoT) reasoning with DPO, Text-to-SQL datasets typically include only final answers (gold SQL queries) without detailed CoT solutions. By augmenting Text-to-SQL datasets with synthetic CoT solutions, we achieve, for the first time, consistent and significant performance improvements using DPO. Our analysis shows that CoT reasoning is crucial for unlocking DPO&#39;s potential, as it mitigates reward hacking, strengthens discriminative capabilities, and improves scalability. These findings offer valuable insights for building more robust Text-to-SQL models. To support further research, we publicly release the code and CoT-enhanced datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11656v1-abstract-full').style.display = 'none'; document.getElementById('2502.11656v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11519">arXiv:2502.11519</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11519">pdf</a>, <a href="https://arxiv.org/format/2502.11519">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3696410.3714636">10.1145/3696410.3714636 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> UniGO: A Unified Graph Neural Network for Modeling Opinion Dynamics on Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+H">Hao Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yuke Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Hao Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+W">Wenying Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11519v1-abstract-short" style="display: inline;"> Polarization and fragmentation in social media amplify user biases, making it increasingly important to understand the evolution of opinions. Opinion dynamics provide interpretability for studying opinion evolution, yet incorporating these insights into predictive models remains challenging. This challenge arises due to the inherent complexity of the diversity of opinion fusion rules and the diffi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11519v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11519v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11519v1-abstract-full" style="display: none;"> Polarization and fragmentation in social media amplify user biases, making it increasingly important to understand the evolution of opinions. Opinion dynamics provide interpretability for studying opinion evolution, yet incorporating these insights into predictive models remains challenging. This challenge arises due to the inherent complexity of the diversity of opinion fusion rules and the difficulty in capturing equilibrium states while avoiding over-smoothing. This paper constructs a unified opinion dynamics model to integrate different opinion fusion rules and generates corresponding synthetic datasets. To fully leverage the advantages of unified opinion dynamics, we introduces UniGO, a framework for modeling opinion evolution on graphs. Using a coarsen-refine mechanism, UniGO efficiently models opinion dynamics through a graph neural network, mitigating over-smoothing while preserving equilibrium phenomena. UniGO leverages pretraining on synthetic datasets, which enhances its ability to generalize to real-world scenarios, providing a viable paradigm for applications of opinion dynamics. Experimental results on both synthetic and real-world datasets demonstrate UniGO&#39;s effectiveness in capturing complex opinion formation processes and predicting future evolution. The pretrained model also shows strong generalization capability, validating the benefits of using synthetic data to boost real-world performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11519v1-abstract-full').style.display = 'none'; document.getElementById('2502.11519v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WWW2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11466">arXiv:2502.11466</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11466">pdf</a>, <a href="https://arxiv.org/format/2502.11466">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> GiFT: Gibbs Fine-Tuning for Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haochen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+W">Wanjin Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Z">Zhiqi Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11466v1-abstract-short" style="display: inline;"> Training Large Language Models (LLMs) with synthetic data is a prevalent practice in code generation. A key approach is self-training, where LLMs are iteratively trained on self-generated correct code snippets. In this case, the self-generated codes are drawn from a conditional distribution, conditioned on a specific seed description. However, the seed description is not the only valid representat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11466v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11466v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11466v1-abstract-full" style="display: none;"> Training Large Language Models (LLMs) with synthetic data is a prevalent practice in code generation. A key approach is self-training, where LLMs are iteratively trained on self-generated correct code snippets. In this case, the self-generated codes are drawn from a conditional distribution, conditioned on a specific seed description. However, the seed description is not the only valid representation that aligns with its intended meaning. With all valid descriptions and codes forming a joint space, codes drawn from the conditional distribution would lead to an underrepresentation of the full description-code space. As such, we propose Gibbs Fine-Tuning (GiFT), a novel self-training method inspired by Gibbs sampling. GiFT allows self-generated data to be drawn from the marginal distribution of the joint space, thereby mitigating the biases inherent in conditional sampling. We provide a theoretical analysis demonstrating the potential benefits of fine-tuning LLMs with code derived from the marginal distribution. Furthermore, we propose a perplexity-based code selection method to mitigate the imbalanced long-tail distribution of the self-generated codes. Empirical evaluation of two LLMs across four datasets demonstrates that GiFT achieves superior performance, particularly on more challenging benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11466v1-abstract-full').style.display = 'none'; document.getElementById('2502.11466v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+H&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10