Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 5,752 results for author: <span class="mathjax">Li, Z</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Li%2C+Z">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Li, Z"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Li%2C+Z&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Li, Z"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Li%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13923">arXiv:2502.13923</a> <span> [<a href="https://arxiv.org/pdf/2502.13923">pdf</a>, <a href="https://arxiv.org/format/2502.13923">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Qwen2.5-VL Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bai%2C+S">Shuai Bai</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keqin Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xuejing Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jialin Wang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+W">Wenbin Ge</a>, <a href="/search/cs?searchtype=author&query=Song%2C+S">Sibo Song</a>, <a href="/search/cs?searchtype=author&query=Dang%2C+K">Kai Dang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peng Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shijie Wang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jun Tang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+H">Humen Zhong</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuanzhi Zhu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Mingkun Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaohai Li</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+J">Jianqiang Wan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengfei Wang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+W">Wei Ding</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Z">Zheren Fu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yiheng Xu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jiabo Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xi Zhang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianbao Xie</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Z">Zesen Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhibo Yang</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13923v1-abstract-short" style="display: inline;"> We introduce Qwen2.5-VL, the latest flagship model of Qwen vision-language series, which demonstrates significant advancements in both foundational capabilities and innovative functionalities. Qwen2.5-VL achieves a major leap forward in understanding and interacting with the world through enhanced visual recognition, precise object localization, robust document parsing, and long-video comprehensio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13923v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13923v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13923v1-abstract-full" style="display: none;"> We introduce Qwen2.5-VL, the latest flagship model of Qwen vision-language series, which demonstrates significant advancements in both foundational capabilities and innovative functionalities. Qwen2.5-VL achieves a major leap forward in understanding and interacting with the world through enhanced visual recognition, precise object localization, robust document parsing, and long-video comprehension. A standout feature of Qwen2.5-VL is its ability to localize objects using bounding boxes or points accurately. It provides robust structured data extraction from invoices, forms, and tables, as well as detailed analysis of charts, diagrams, and layouts. To handle complex inputs, Qwen2.5-VL introduces dynamic resolution processing and absolute time encoding, enabling it to process images of varying sizes and videos of extended durations (up to hours) with second-level event localization. This allows the model to natively perceive spatial scales and temporal dynamics without relying on traditional normalization techniques. By training a native dynamic-resolution Vision Transformer (ViT) from scratch and incorporating Window Attention, we reduce computational overhead while maintaining native resolution. As a result, Qwen2.5-VL excels not only in static image and document understanding but also as an interactive visual agent capable of reasoning, tool usage, and task execution in real-world scenarios such as operating computers and mobile devices. Qwen2.5-VL is available in three sizes, addressing diverse use cases from edge AI to high-performance computing. The flagship Qwen2.5-VL-72B model matches state-of-the-art models like GPT-4o and Claude 3.5 Sonnet, particularly excelling in document and diagram understanding. Additionally, Qwen2.5-VL maintains robust linguistic performance, preserving the core language competencies of the Qwen2.5 LLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13923v1-abstract-full').style.display = 'none'; document.getElementById('2502.13923v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13834">arXiv:2502.13834</a> <span> [<a href="https://arxiv.org/pdf/2502.13834">pdf</a>, <a href="https://arxiv.org/format/2502.13834">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Proving Olympiad Inequalities by Synergizing LLMs and Symbolic Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zenan Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoyu Li</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+W">Wen Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xian Zhang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuan Yao</a>, <a href="/search/cs?searchtype=author&query=Si%2C+X">Xujie Si</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+K">Kaiyu Yang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xiaoxing Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13834v1-abstract-short" style="display: inline;"> Large language models (LLMs) can prove mathematical theorems formally by generating proof steps (\textit{a.k.a.} tactics) within a proof system. However, the space of possible tactics is vast and complex, while the available training data for formal proofs is limited, posing a significant challenge to LLM-based tactic generation. To address this, we introduce a neuro-symbolic tactic generator that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13834v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13834v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13834v1-abstract-full" style="display: none;"> Large language models (LLMs) can prove mathematical theorems formally by generating proof steps (\textit{a.k.a.} tactics) within a proof system. However, the space of possible tactics is vast and complex, while the available training data for formal proofs is limited, posing a significant challenge to LLM-based tactic generation. To address this, we introduce a neuro-symbolic tactic generator that synergizes the mathematical intuition learned by LLMs with domain-specific insights encoded by symbolic methods. The key aspect of this integration is identifying which parts of mathematical reasoning are best suited to LLMs and which to symbolic methods. While the high-level idea of neuro-symbolic integration is broadly applicable to various mathematical problems, in this paper, we focus specifically on Olympiad inequalities (Figure~1). We analyze how humans solve these problems and distill the techniques into two types of tactics: (1) scaling, handled by symbolic methods, and (2) rewriting, handled by LLMs. In addition, we combine symbolic tools with LLMs to prune and rank the proof goals for efficient proof search. We evaluate our framework on 161 challenging inequalities from multiple mathematics competitions, achieving state-of-the-art performance and significantly outperforming existing LLM and symbolic approaches without requiring additional training data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13834v1-abstract-full').style.display = 'none'; document.getElementById('2502.13834v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at ICLR 2025. Code is available at https://github.com/Lizn-zn/NeqLIPS/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13388">arXiv:2502.13388</a> <span> [<a href="https://arxiv.org/pdf/2502.13388">pdf</a>, <a href="https://arxiv.org/format/2502.13388">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Reflection of Episodes: Learning to Play Game from Expert and Self Experiences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaojie Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zongyuan Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chang Lu</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+R">Runnan Qi</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Y">Yanan Ni</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Lumin Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiangbei Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuebo Zhang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yongchun Fang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kuihua Huang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xian Guo</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhanghua Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenya Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13388v1-abstract-short" style="display: inline;"> StarCraft II is a complex and dynamic real-time strategy (RTS) game environment, which is very suitable for artificial intelligence and reinforcement learning research. To address the problem of Large Language Model(LLM) learning in complex environments through self-reflection, we propose a Reflection of Episodes(ROE) framework based on expert experience and self-experience. This framework first o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13388v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13388v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13388v1-abstract-full" style="display: none;"> StarCraft II is a complex and dynamic real-time strategy (RTS) game environment, which is very suitable for artificial intelligence and reinforcement learning research. To address the problem of Large Language Model(LLM) learning in complex environments through self-reflection, we propose a Reflection of Episodes(ROE) framework based on expert experience and self-experience. This framework first obtains key information in the game through a keyframe selection method, then makes decisions based on expert experience and self-experience. After a game is completed, it reflects on the previous experience to obtain new self-experience. Finally, in the experiment, our method beat the robot under the Very Hard difficulty in TextStarCraft II. We analyze the data of the LLM in the process of the game in detail, verified its effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13388v1-abstract-full').style.display = 'none'; document.getElementById('2502.13388v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13386">arXiv:2502.13386</a> <span> [<a href="https://arxiv.org/pdf/2502.13386">pdf</a>, <a href="https://arxiv.org/format/2502.13386">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Statistical QoS Provisioning for Underwater Magnetic Induction Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhichao Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianyu Wang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wenchi Cheng</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yudong Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13386v1-abstract-short" style="display: inline;"> Magnetic induction (MI) communication, with stable channel conditions and small antenna size, is considered as a promising solution for underwater communication network. However, the narrowband nature of the MI link can cause significant delays in the network. To comprehensively ensure the timeliness and effectiveness of the MI network, in this paper we introduce a statistical quality of service (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13386v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13386v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13386v1-abstract-full" style="display: none;"> Magnetic induction (MI) communication, with stable channel conditions and small antenna size, is considered as a promising solution for underwater communication network. However, the narrowband nature of the MI link can cause significant delays in the network. To comprehensively ensure the timeliness and effectiveness of the MI network, in this paper we introduce a statistical quality of service (QoS) framework for MI communication, aiming to maximize the achievable rate while provisioning delay and queue-length requirements. Specifically, we employ effective capacity theory to model underwater MI communication. Based on convex optimization theory, we propose a current control strategy that maximizes the effective capacity under the constraints of limited channel capacity and limited power. Simulations demonstrate that the current control strategy proposed for MI communication differs significantly from that in the conventional statistical QoS provisioning framework. In addition, compared to other current control strategies, the proposed strategy substantially improves the achievable rate under various delay QoS requirements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13386v1-abstract-full').style.display = 'none'; document.getElementById('2502.13386v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13358">arXiv:2502.13358</a> <span> [<a href="https://arxiv.org/pdf/2502.13358">pdf</a>, <a href="https://arxiv.org/format/2502.13358">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Bridging the Editing Gap in LLMs: FineEdit for Precise and Targeted Text Modifications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yiming Zeng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wanhao Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zexin Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+T">Tao Ren</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yu Ma</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jinghan Cao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiyan Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tingting Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13358v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have transformed natural language processing, yet they still struggle with direct text editing tasks that demand precise, context-aware modifications. While models like ChatGPT excel in text generation and analysis, their editing abilities often fall short, addressing only superficial issues rather than deeper structural or logical inconsistencies. In this work, we int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13358v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13358v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13358v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have transformed natural language processing, yet they still struggle with direct text editing tasks that demand precise, context-aware modifications. While models like ChatGPT excel in text generation and analysis, their editing abilities often fall short, addressing only superficial issues rather than deeper structural or logical inconsistencies. In this work, we introduce a dual approach to enhance LLMs editing performance. First, we present InstrEditBench, a high-quality benchmark dataset comprising over 20,000 structured editing tasks spanning Wiki articles, LaTeX documents, code, and database Domain-specific Languages (DSL). InstrEditBench is generated using an innovative automated workflow that accurately identifies and evaluates targeted edits, ensuring that modifications adhere strictly to specified instructions without altering unrelated content. Second, we propose FineEdit, a specialized model trained on this curated benchmark. Experimental results demonstrate that FineEdit achieves significant improvements around {10\%} compared with Gemini on direct editing tasks, convincingly validating its effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13358v1-abstract-full').style.display = 'none'; document.getElementById('2502.13358v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13260">arXiv:2502.13260</a> <span> [<a href="https://arxiv.org/pdf/2502.13260">pdf</a>, <a href="https://arxiv.org/format/2502.13260">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Stepwise Perplexity-Guided Refinement for Efficient Chain-of-Thought Reasoning in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cui%2C+Y">Yingqian Cui</a>, <a href="/search/cs?searchtype=author&query=He%2C+P">Pengfei He</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+J">Jingying Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xianfeng Tang</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+Z">Zhenwei Dai</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yan Han</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+C">Chen Luo</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jing Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhen Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Suhang Wang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+Y">Yue Xing</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiliang Tang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Q">Qi He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13260v1-abstract-short" style="display: inline;"> Chain-of-Thought (CoT) reasoning, which breaks down complex tasks into intermediate reasoning steps, has significantly enhanced the performance of large language models (LLMs) on challenging tasks. However, the detailed reasoning process in CoT often incurs long generation times and high computational costs, partly due to the inclusion of unnecessary steps. To address this, we propose a method to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13260v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13260v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13260v1-abstract-full" style="display: none;"> Chain-of-Thought (CoT) reasoning, which breaks down complex tasks into intermediate reasoning steps, has significantly enhanced the performance of large language models (LLMs) on challenging tasks. However, the detailed reasoning process in CoT often incurs long generation times and high computational costs, partly due to the inclusion of unnecessary steps. To address this, we propose a method to identify critical reasoning steps using perplexity as a measure of their importance: a step is deemed critical if its removal causes a significant increase in perplexity. Our method enables models to focus solely on generating these critical steps. This can be achieved through two approaches: refining demonstration examples in few-shot CoT or fine-tuning the model using selected examples that include only critical steps. Comprehensive experiments validate the effectiveness of our method, which achieves a better balance between the reasoning accuracy and efficiency of CoT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13260v1-abstract-full').style.display = 'none'; document.getElementById('2502.13260v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13059">arXiv:2502.13059</a> <span> [<a href="https://arxiv.org/pdf/2502.13059">pdf</a>, <a href="https://arxiv.org/format/2502.13059">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SimpleVQA: Multimodal Factuality Evaluation for Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xianfu Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shiwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+X">Xiangyuan Guan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xianjie Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaheng Liu</a>, <a href="/search/cs?searchtype=author&query=Mai%2C+Y">Yuying Mai</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yutao Zeng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zhoufutu Wen</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+K">Ke Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Baorui Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Weixiao Zhou</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yunhong Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tongliang Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhoujun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13059v1-abstract-short" style="display: inline;"> The increasing application of multi-modal large language models (MLLMs) across various sectors have spotlighted the essence of their output reliability and accuracy, particularly their ability to produce content grounded in factual information (e.g. common and domain-specific knowledge). In this work, we introduce SimpleVQA, the first comprehensive multi-modal benchmark to evaluate the factuality… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13059v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13059v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13059v1-abstract-full" style="display: none;"> The increasing application of multi-modal large language models (MLLMs) across various sectors have spotlighted the essence of their output reliability and accuracy, particularly their ability to produce content grounded in factual information (e.g. common and domain-specific knowledge). In this work, we introduce SimpleVQA, the first comprehensive multi-modal benchmark to evaluate the factuality ability of MLLMs to answer natural language short questions. SimpleVQA is characterized by six key features: it covers multiple tasks and multiple scenarios, ensures high quality and challenging queries, maintains static and timeless reference answers, and is straightforward to evaluate. Our approach involves categorizing visual question-answering items into 9 different tasks around objective events or common knowledge and situating these within 9 topics. Rigorous quality control processes are implemented to guarantee high-quality, concise, and clear answers, facilitating evaluation with minimal variance via an LLM-as-a-judge scoring system. Using SimpleVQA, we perform a comprehensive assessment of leading 18 MLLMs and 8 text-only LLMs, delving into their image comprehension and text generation abilities by identifying and analyzing error cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13059v1-abstract-full').style.display = 'none'; document.getElementById('2502.13059v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12919">arXiv:2502.12919</a> <span> [<a href="https://arxiv.org/pdf/2502.12919">pdf</a>, <a href="https://arxiv.org/format/2502.12919">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Smooth Transition Between Induction and Deduction: Fast Abductive Learning Based on Probabilistic Symbol Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+L">Lin-Han Jia</a>, <a href="/search/cs?searchtype=author&query=Han%2C+S">Si-Yu Han</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+L">Lan-Zhe Guo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zhi Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhao-Long Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yu-Feng Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zhi-Hua Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12919v1-abstract-short" style="display: inline;"> Abductive learning (ABL) that integrates strengths of machine learning and logical reasoning to improve the learning generalization, has been recently shown effective. However, its efficiency is affected by the transition between numerical induction and symbolical deduction, leading to high computational costs in the worst-case scenario. Efforts on this issue remain to be limited. In this paper, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12919v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12919v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12919v1-abstract-full" style="display: none;"> Abductive learning (ABL) that integrates strengths of machine learning and logical reasoning to improve the learning generalization, has been recently shown effective. However, its efficiency is affected by the transition between numerical induction and symbolical deduction, leading to high computational costs in the worst-case scenario. Efforts on this issue remain to be limited. In this paper, we identified three reasons why previous optimization algorithms for ABL were not effective: insufficient utilization of prediction, symbol relationships, and accumulated experience in successful abductive processes, resulting in redundant calculations to the knowledge base. To address these challenges, we introduce an optimization algorithm named as Probabilistic Symbol Perception (PSP), which makes a smooth transition between induction and deduction and keeps the correctness of ABL unchanged. We leverage probability as a bridge and present an efficient data structure, achieving the transfer from a continuous probability sequence to discrete Boolean sequences with low computational complexity. Experiments demonstrate the promising results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12919v1-abstract-full').style.display = 'none'; document.getElementById('2502.12919v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12908">arXiv:2502.12908</a> <span> [<a href="https://arxiv.org/pdf/2502.12908">pdf</a>, <a href="https://arxiv.org/format/2502.12908">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Graph Neural Networks for Databases: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziming Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Youhuan Li</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yuyu Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guoliang Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chuxu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12908v2-abstract-short" style="display: inline;"> Graph neural networks (GNNs) are powerful deep learning models for graph-structured data, demonstrating remarkable success across diverse domains. Recently, the database (DB) community has increasingly recognized the potentiality of GNNs, prompting a surge of researches focusing on improving database systems through GNN-based approaches. However, despite notable advances, There is a lack of a comp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12908v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12908v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12908v2-abstract-full" style="display: none;"> Graph neural networks (GNNs) are powerful deep learning models for graph-structured data, demonstrating remarkable success across diverse domains. Recently, the database (DB) community has increasingly recognized the potentiality of GNNs, prompting a surge of researches focusing on improving database systems through GNN-based approaches. However, despite notable advances, There is a lack of a comprehensive review and understanding of how GNNs could improve DB systems. Therefore, this survey aims to bridge this gap by providing a structured and in-depth overview of GNNs for DB systems. Specifically, we propose a new taxonomy that classifies existing methods into two key categories: (1) Relational Databases, which includes tasks like performance prediction, query optimization, and text-to-SQL, and (2) Graph Databases, addressing challenges like efficient graph query processing and graph similarity computation. We systematically review key methods in each category, highlighting their contributions and practical implications. Finally, we suggest promising avenues for integrating GNNs into Database systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12908v2-abstract-full').style.display = 'none'; document.getElementById('2502.12908v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">A survey focus on GNNs and databases. 9 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12744">arXiv:2502.12744</a> <span> [<a href="https://arxiv.org/pdf/2502.12744">pdf</a>, <a href="https://arxiv.org/format/2502.12744">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Self-Enhanced Reasoning Training: Activating Latent Reasoning in Small Models for Enhanced Reasoning Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bingyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhitao Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+N">Ning Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Minchuan Chen</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+T">Tao Wei</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shaojun Wang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+J">Jing Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12744v1-abstract-short" style="display: inline;"> The rapid advancement of large language models (LLMs) has significantly enhanced their reasoning abilities, enabling increasingly complex tasks. However, these capabilities often diminish in smaller, more computationally efficient models like GPT-2. Recent research shows that reasoning distillation can help small models acquire reasoning capabilities, but most existing methods focus primarily on i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12744v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12744v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12744v1-abstract-full" style="display: none;"> The rapid advancement of large language models (LLMs) has significantly enhanced their reasoning abilities, enabling increasingly complex tasks. However, these capabilities often diminish in smaller, more computationally efficient models like GPT-2. Recent research shows that reasoning distillation can help small models acquire reasoning capabilities, but most existing methods focus primarily on improving teacher-generated reasoning paths. Our observations reveal that small models can generate high-quality reasoning paths during sampling, even without chain-of-thought prompting, though these paths are often latent due to their low probability under standard decoding strategies. To address this, we propose Self-Enhanced Reasoning Training (SERT), which activates and leverages latent reasoning capabilities in small models through self-training on filtered, self-generated reasoning paths under zero-shot conditions. Experiments using OpenAI's GPT-3.5 as the teacher model and GPT-2 models as the student models demonstrate that SERT enhances the reasoning abilities of small models, improving their performance in reasoning distillation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12744v1-abstract-full').style.display = 'none'; document.getElementById('2502.12744v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the 50th IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12635">arXiv:2502.12635</a> <span> [<a href="https://arxiv.org/pdf/2502.12635">pdf</a>, <a href="https://arxiv.org/format/2502.12635">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Corrupted but Not Broken: Rethinking the Impact of Corrupted Data in Visual Instruction Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gou%2C+Y">Yunhao Gou</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hansi Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhili Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yihan Zeng</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+L">Lanqing Hong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenguo Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qun Liu</a>, <a href="/search/cs?searchtype=author&query=Kwok%2C+J+T">James T. Kwok</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12635v1-abstract-short" style="display: inline;"> Visual Instruction Tuning (VIT) enhances Multimodal Large Language Models (MLLMs) but it is hindered by corrupted datasets containing hallucinated content, incorrect responses, and poor OCR quality. While prior works focus on dataset refinement through high-quality data collection or rule-based filtering, they are costly or limited to specific types of corruption. To deeply understand how corrupte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12635v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12635v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12635v1-abstract-full" style="display: none;"> Visual Instruction Tuning (VIT) enhances Multimodal Large Language Models (MLLMs) but it is hindered by corrupted datasets containing hallucinated content, incorrect responses, and poor OCR quality. While prior works focus on dataset refinement through high-quality data collection or rule-based filtering, they are costly or limited to specific types of corruption. To deeply understand how corrupted data affects MLLMs, in this paper, we systematically investigate this issue and find that while corrupted data degrades the performance of MLLMs, its effects are largely superficial in that the performance of MLLMs can be largely restored by either disabling a small subset of parameters or post-training with a small amount of clean data. Additionally, corrupted MLLMs exhibit improved ability to distinguish clean samples from corrupted ones, enabling the dataset cleaning without external help. Based on those insights, we propose a corruption-robust training paradigm combining self-validation and post-training, which significantly outperforms existing corruption mitigation strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12635v1-abstract-full').style.display = 'none'; document.getElementById('2502.12635v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12614">arXiv:2502.12614</a> <span> [<a href="https://arxiv.org/pdf/2502.12614">pdf</a>, <a href="https://arxiv.org/format/2502.12614">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Label Drop for Multi-Aspect Relation Modeling in Universal Information Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+L">Lu Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiajia Li</a>, <a href="/search/cs?searchtype=author&query=Ci%2C+E">En Ci</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lefei Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zuchao Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Ping Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12614v1-abstract-short" style="display: inline;"> Universal Information Extraction (UIE) has garnered significant attention due to its ability to address model explosion problems effectively. Extractive UIE can achieve strong performance using a relatively small model, making it widely adopted. Extractive UIEs generally rely on task instructions for different tasks, including single-target instructions and multiple-target instructions. Single-tar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12614v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12614v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12614v1-abstract-full" style="display: none;"> Universal Information Extraction (UIE) has garnered significant attention due to its ability to address model explosion problems effectively. Extractive UIE can achieve strong performance using a relatively small model, making it widely adopted. Extractive UIEs generally rely on task instructions for different tasks, including single-target instructions and multiple-target instructions. Single-target instruction UIE enables the extraction of only one type of relation at a time, limiting its ability to model correlations between relations and thus restricting its capability to extract complex relations. While multiple-target instruction UIE allows for the extraction of multiple relations simultaneously, the inclusion of irrelevant relations introduces decision complexity and impacts extraction accuracy. Therefore, for multi-relation extraction, we propose LDNet, which incorporates multi-aspect relation modeling and a label drop mechanism. By assigning different relations to different levels for understanding and decision-making, we reduce decision confusion. Additionally, the label drop mechanism effectively mitigates the impact of irrelevant relations. Experiments show that LDNet outperforms or achieves competitive performance with state-of-the-art systems on 9 tasks, 33 datasets, in both single-modal and multi-modal, few-shot and zero-shot settings.\footnote{https://github.com/Lu-Yang666/LDNet} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12614v1-abstract-full').style.display = 'none'; document.getElementById('2502.12614v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL-main 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12603">arXiv:2502.12603</a> <span> [<a href="https://arxiv.org/pdf/2502.12603">pdf</a>, <a href="https://arxiv.org/format/2502.12603">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Disentangling Long-Short Term State Under Unknown Interventions for Online Time Series Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+R">Ruichu Cai</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haiqin Huang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhifang Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zijian Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Changze Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuequn Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuming Liu</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+Z">Zhifeng Hao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12603v1-abstract-short" style="display: inline;"> Current methods for time series forecasting struggle in the online scenario, since it is difficult to preserve long-term dependency while adapting short-term changes when data are arriving sequentially. Although some recent methods solve this problem by controlling the updates of latent states, they cannot disentangle the long/short-term states, leading to the inability to effectively adapt to non… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12603v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12603v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12603v1-abstract-full" style="display: none;"> Current methods for time series forecasting struggle in the online scenario, since it is difficult to preserve long-term dependency while adapting short-term changes when data are arriving sequentially. Although some recent methods solve this problem by controlling the updates of latent states, they cannot disentangle the long/short-term states, leading to the inability to effectively adapt to nonstationary. To tackle this challenge, we propose a general framework to disentangle long/short-term states for online time series forecasting. Our idea is inspired by the observations where short-term changes can be led by unknown interventions like abrupt policies in the stock market. Based on this insight, we formalize a data generation process with unknown interventions on short-term states. Under mild assumptions, we further leverage the independence of short-term states led by unknown interventions to establish the identification theory to achieve the disentanglement of long/short-term states. Built on this theory, we develop a long short-term disentanglement model (LSTD) to extract the long/short-term states with long/short-term encoders, respectively. Furthermore, the LSTD model incorporates a smooth constraint to preserve the long-term dependencies and an interrupted dependency constraint to enforce the forgetting of short-term dependencies, together boosting the disentanglement of long/short-term states. Experimental results on several benchmark datasets show that our \textbf{LSTD} model outperforms existing methods for online time series forecasting, validating its efficacy in real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12603v1-abstract-full').style.display = 'none'; document.getElementById('2502.12603v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> AAAI2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12534">arXiv:2502.12534</a> <span> [<a href="https://arxiv.org/pdf/2502.12534">pdf</a>, <a href="https://arxiv.org/format/2502.12534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NoKSR: Kernel-Free Neural Surface Reconstruction via Point Cloud Serialization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhen Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Weiwei Sun</a>, <a href="/search/cs?searchtype=author&query=Govindarajan%2C+S">Shrisudhan Govindarajan</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+S">Shaobo Xia</a>, <a href="/search/cs?searchtype=author&query=Rebain%2C+D">Daniel Rebain</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+K+M">Kwang Moo Yi</a>, <a href="/search/cs?searchtype=author&query=Tagliasacchi%2C+A">Andrea Tagliasacchi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12534v2-abstract-short" style="display: inline;"> We present a novel approach to large-scale point cloud surface reconstruction by developing an efficient framework that converts an irregular point cloud into a signed distance field (SDF). Our backbone builds upon recent transformer-based architectures (i.e., PointTransformerV3), that serializes the point cloud into a locality-preserving sequence of tokens. We efficiently predict the SDF value at… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12534v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12534v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12534v2-abstract-full" style="display: none;"> We present a novel approach to large-scale point cloud surface reconstruction by developing an efficient framework that converts an irregular point cloud into a signed distance field (SDF). Our backbone builds upon recent transformer-based architectures (i.e., PointTransformerV3), that serializes the point cloud into a locality-preserving sequence of tokens. We efficiently predict the SDF value at a point by aggregating nearby tokens, where fast approximate neighbors can be retrieved thanks to the serialization. We serialize the point cloud at different levels/scales, and non-linearly aggregate a feature to predict the SDF value. We show that aggregating across multiple scales is critical to overcome the approximations introduced by the serialization (i.e. false negatives in the neighborhood). Our frameworks sets the new state-of-the-art in terms of accuracy and efficiency (better or similar performance with half the latency of the best prior method, coupled with a simpler implementation), particularly on outdoor datasets where sparse-grid methods have shown limited performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12534v2-abstract-full').style.display = 'none'; document.getElementById('2502.12534v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: see https://theialab.github.io/noksr/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12527">arXiv:2502.12527</a> <span> [<a href="https://arxiv.org/pdf/2502.12527">pdf</a>, <a href="https://arxiv.org/format/2502.12527">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Comprehensive Assessment and Analysis for NSFW Content Erasure in Text-to-Image Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+D">Die Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiwen Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cen Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaodan Li</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jinyan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12527v1-abstract-short" style="display: inline;"> Text-to-image (T2I) diffusion models have gained widespread application across various domains, demonstrating remarkable creative potential. However, the strong generalization capabilities of these models can inadvertently led they to generate NSFW content even with efforts on filtering NSFW content from the training dataset, posing risks to their safe deployment. While several concept erasure met… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12527v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12527v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12527v1-abstract-full" style="display: none;"> Text-to-image (T2I) diffusion models have gained widespread application across various domains, demonstrating remarkable creative potential. However, the strong generalization capabilities of these models can inadvertently led they to generate NSFW content even with efforts on filtering NSFW content from the training dataset, posing risks to their safe deployment. While several concept erasure methods have been proposed to mitigate this issue, a comprehensive evaluation of their effectiveness remains absent. To bridge this gap, we present the first systematic investigation of concept erasure methods for NSFW content and its sub-themes in text-to-image diffusion models. At the task level, we provide a holistic evaluation of 11 state-of-the-art baseline methods with 14 variants. Specifically, we analyze these methods from six distinct assessment perspectives, including three conventional perspectives, i.e., erasure proportion, image quality, and semantic alignment, and three new perspectives, i.e., excessive erasure, the impact of explicit and implicit unsafe prompts, and robustness. At the tool level, we perform a detailed toxicity analysis of NSFW datasets and compare the performance of different NSFW classifiers, offering deeper insights into their performance alongside a compilation of comprehensive evaluation metrics. Our benchmark not only systematically evaluates concept erasure methods, but also delves into the underlying factors influencing their performance at the insight level. By synthesizing insights from various evaluation perspectives, we provide a deeper understanding of the challenges and opportunities in the field, offering actionable guidance and inspiration for advancing research and practical applications in concept erasure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12527v1-abstract-full').style.display = 'none'; document.getElementById('2502.12527v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12442">arXiv:2502.12442</a> <span> [<a href="https://arxiv.org/pdf/2502.12442">pdf</a>, <a href="https://arxiv.org/format/2502.12442">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> HopRAG: Multi-Hop Reasoning for Logic-Aware Retrieval-Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hao Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhengren Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiyu Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+F">Feiyu Xiong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qinhan Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wentao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12442v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) systems often struggle with imperfect retrieval, as traditional retrievers focus on lexical or semantic similarity rather than logical relevance. To address this, we propose HopRAG, a novel RAG framework that augments retrieval with logical reasoning through graph-structured knowledge exploration. During indexing, HopRAG constructs a passage graph, with text ch… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12442v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12442v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12442v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) systems often struggle with imperfect retrieval, as traditional retrievers focus on lexical or semantic similarity rather than logical relevance. To address this, we propose HopRAG, a novel RAG framework that augments retrieval with logical reasoning through graph-structured knowledge exploration. During indexing, HopRAG constructs a passage graph, with text chunks as vertices and logical connections established via LLM-generated pseudo-queries as edges. During retrieval, it employs a retrieve-reason-prune mechanism: starting with lexically or semantically similar passages, the system explores multi-hop neighbors guided by pseudo-queries and LLM reasoning to identify truly relevant ones. Extensive experiments demonstrate HopRAG's superiority, achieving 76.78\% higher answer accuracy and 65.07\% improved retrieval F1 score compared to conventional methods. The repository is available at https://github.com/LIU-Hao-2002/HopRAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12442v1-abstract-full').style.display = 'none'; document.getElementById('2502.12442v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12380">arXiv:2502.12380</a> <span> [<a href="https://arxiv.org/pdf/2502.12380">pdf</a>, <a href="https://arxiv.org/format/2502.12380">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Nexus Machine: An Active Message Inspired Reconfigurable Architecture for Irregular Workloads </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Juneja%2C+R">Rohan Juneja</a>, <a href="/search/cs?searchtype=author&query=Dangi%2C+P">Pranav Dangi</a>, <a href="/search/cs?searchtype=author&query=Bandara%2C+T+K">Thilini Kaushalya Bandara</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoying Li</a>, <a href="/search/cs?searchtype=author&query=Mitra%2C+T">Tulika Mitra</a>, <a href="/search/cs?searchtype=author&query=Peh%2C+L">Li-shiuan Peh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12380v2-abstract-short" style="display: inline;"> Modern reconfigurable architectures are increasingly favored for resource-constrained edge devices as they balance high performance, energy efficiency, and programmability well. However, their proficiency in handling regular compute patterns constrains their effectiveness in executing irregular workloads, such as sparse linear algebra and graph analytics with unpredictable access patterns and cont… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12380v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12380v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12380v2-abstract-full" style="display: none;"> Modern reconfigurable architectures are increasingly favored for resource-constrained edge devices as they balance high performance, energy efficiency, and programmability well. However, their proficiency in handling regular compute patterns constrains their effectiveness in executing irregular workloads, such as sparse linear algebra and graph analytics with unpredictable access patterns and control flow. To address this limitation, we introduce the Nexus Machine, a novel reconfigurable architecture consisting of a PE array designed to efficiently handle irregularity by distributing sparse tensors across the fabric and employing active messages that morph instructions based on dynamic control flow. As the inherent irregularity in workloads can lead to high load imbalance among different Processing Elements (PEs), Nexus Machine deploys and executes instructions en-route on idle PEs at run-time. Thus, unlike traditional reconfigurable architectures with only static instructions within each PE, Nexus Machine brings dynamic control to the idle compute units, mitigating load imbalance and enhancing overall performance. Our experiments demonstrate that Nexus Machine achieves 1.5x performance gain compared to state-of-the-art (SOTA) reconfigurable architectures, within the same power budget and area. Nexus Machine also achieves 1.6x higher fabric utilization, in contrast to SOTA architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12380v2-abstract-full').style.display = 'none'; document.getElementById('2502.12380v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12267">arXiv:2502.12267</a> <span> [<a href="https://arxiv.org/pdf/2502.12267">pdf</a>, <a href="https://arxiv.org/format/2502.12267">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> NeuroStrata: Harnessing Neurosymbolic Paradigms for Improved Design, Testability, and Verifiability of Autonomous CPS </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xi Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziyang Li</a>, <a href="/search/cs?searchtype=author&query=Ruchkin%2C+I">Ivan Ruchkin</a>, <a href="/search/cs?searchtype=author&query=Piskac%2C+R">Ruzica Piskac</a>, <a href="/search/cs?searchtype=author&query=Pajic%2C+M">Miroslav Pajic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12267v1-abstract-short" style="display: inline;"> Autonomous cyber-physical systems (CPSs) leverage AI for perception, planning, and control but face trust and safety certification challenges due to inherent uncertainties. The neurosymbolic paradigm replaces stochastic layers with interpretable symbolic AI, enabling determinism. While promising, challenges like multisensor fusion, adaptability, and verification remain. This paper introduces Neuro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12267v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12267v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12267v1-abstract-full" style="display: none;"> Autonomous cyber-physical systems (CPSs) leverage AI for perception, planning, and control but face trust and safety certification challenges due to inherent uncertainties. The neurosymbolic paradigm replaces stochastic layers with interpretable symbolic AI, enabling determinism. While promising, challenges like multisensor fusion, adaptability, and verification remain. This paper introduces NeuroStrata, a neurosymbolic framework to enhance the testing and verification of autonomous CPS. We outline its key components, present early results, and detail future plans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12267v1-abstract-full').style.display = 'none'; document.getElementById('2502.12267v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11919">arXiv:2502.11919</a> <span> [<a href="https://arxiv.org/pdf/2502.11919">pdf</a>, <a href="https://arxiv.org/format/2502.11919">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> From Text to Trust: Empowering AI-assisted Decision Making with Adaptive LLM-powered Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoyan Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Hangxiao Zhu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zhuoran Lu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Ziang Xiao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11919v1-abstract-short" style="display: inline;"> AI-assisted decision making becomes increasingly prevalent, yet individuals often fail to utilize AI-based decision aids appropriately especially when the AI explanations are absent, potentially as they do not %understand reflect on AI's decision recommendations critically. Large language models (LLMs), with their exceptional conversational and analytical capabilities, present great opportunities… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11919v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11919v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11919v1-abstract-full" style="display: none;"> AI-assisted decision making becomes increasingly prevalent, yet individuals often fail to utilize AI-based decision aids appropriately especially when the AI explanations are absent, potentially as they do not %understand reflect on AI's decision recommendations critically. Large language models (LLMs), with their exceptional conversational and analytical capabilities, present great opportunities to enhance AI-assisted decision making in the absence of AI explanations by providing natural-language-based analysis of AI's decision recommendation, e.g., how each feature of a decision making task might contribute to the AI recommendation. In this paper, via a randomized experiment, we first show that presenting LLM-powered analysis of each task feature, either sequentially or concurrently, does not significantly improve people's AI-assisted decision performance. To enable decision makers to better leverage LLM-powered analysis, we then propose an algorithmic framework to characterize the effects of LLM-powered analysis on human decisions and dynamically decide which analysis to present. Our evaluation with human subjects shows that this approach effectively improves decision makers' appropriate reliance on AI in AI-assisted decision making. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11919v1-abstract-full').style.display = 'none'; document.getElementById('2502.11919v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CHI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11726">arXiv:2502.11726</a> <span> [<a href="https://arxiv.org/pdf/2502.11726">pdf</a>, <a href="https://arxiv.org/format/2502.11726">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.cag.2025.104176">10.1016/j.cag.2025.104176 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> No-reference geometry quality assessment for colorless point clouds via list-wise rank learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+B">Bingxu Xie</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+C">Chao Chu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Weiqing Li</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhiyong Su</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11726v1-abstract-short" style="display: inline;"> Geometry quality assessment (GQA) of colorless point clouds is crucial for evaluating the performance of emerging point cloud-based solutions (e.g., watermarking, compression, and 3-Dimensional (3D) reconstruction). Unfortunately, existing objective GQA approaches are traditional full-reference metrics, whereas state-of-the-art learning-based point cloud quality assessment (PCQA) methods target bo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11726v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11726v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11726v1-abstract-full" style="display: none;"> Geometry quality assessment (GQA) of colorless point clouds is crucial for evaluating the performance of emerging point cloud-based solutions (e.g., watermarking, compression, and 3-Dimensional (3D) reconstruction). Unfortunately, existing objective GQA approaches are traditional full-reference metrics, whereas state-of-the-art learning-based point cloud quality assessment (PCQA) methods target both color and geometry distortions, neither of which are qualified for the no-reference GQA task. In addition, the lack of large-scale GQA datasets with subjective scores, which are always imprecise, biased, and inconsistent, also hinders the development of learning-based GQA metrics. Driven by these limitations, this paper proposes a no-reference geometry-only quality assessment approach based on list-wise rank learning, termed LRL-GQA, which comprises of a geometry quality assessment network (GQANet) and a list-wise rank learning network (LRLNet). The proposed LRL-GQA formulates the no-reference GQA as a list-wise rank problem, with the objective of directly optimizing the entire quality ordering. Specifically, a large dataset containing a variety of geometry-only distortions is constructed first, named LRL dataset, in which each sample is label-free but coupled with quality ranking information. Then, the GQANet is designed to capture intrinsic multi-scale patch-wise geometric features in order to predict a quality index for each point cloud. After that, the LRLNet leverages the LRL dataset and a likelihood loss to train the GQANet and ranks the input list of degraded point clouds according to their distortion levels. In addition, the pre-trained GQANet can be fine-tuned further to obtain absolute quality scores. Experimental results demonstrate the superior performance of the proposed no-reference LRL-GQA method compared with existing full-reference GQA metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11726v1-abstract-full').style.display = 'none'; document.getElementById('2502.11726v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Computers & Graphics, Volume 127, April 2025, 104176 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11710">arXiv:2502.11710</a> <span> [<a href="https://arxiv.org/pdf/2502.11710">pdf</a>, <a href="https://arxiv.org/format/2502.11710">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TCSVT.2025.3541445">10.1109/TCSVT.2025.3541445 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The Worse The Better: Content-Aware Viewpoint Generation Network for Projection-related Point Cloud Quality Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhiyong Su</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+B">Bingxu Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jincan Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Weiqing Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11710v1-abstract-short" style="display: inline;"> Through experimental studies, however, we observed the instability of final predicted quality scores, which change significantly over different viewpoint settings. Inspired by the "wooden barrel theory", given the default content-independent viewpoints of existing projection-related PCQA approaches, this paper presents a novel content-aware viewpoint generation network (CAVGN) to learn better view… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11710v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11710v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11710v1-abstract-full" style="display: none;"> Through experimental studies, however, we observed the instability of final predicted quality scores, which change significantly over different viewpoint settings. Inspired by the "wooden barrel theory", given the default content-independent viewpoints of existing projection-related PCQA approaches, this paper presents a novel content-aware viewpoint generation network (CAVGN) to learn better viewpoints by taking the distribution of geometric and attribute features of degraded point clouds into consideration. Firstly, the proposed CAVGN extracts multi-scale geometric and texture features of the entire input point cloud, respectively. Then, for each default content-independent viewpoint, the extracted geometric and texture features are refined to focus on its corresponding visible part of the input point cloud. Finally, the refined geometric and texture features are concatenated to generate an optimized viewpoint. To train the proposed CAVGN, we present a self-supervised viewpoint ranking network (SSVRN) to select the viewpoint with the worst quality projected image to construct a default-optimized viewpoint dataset, which consists of thousands of paired default viewpoints and corresponding optimized viewpoints. Experimental results show that the projection-related PCQA methods can achieve higher performance using the viewpoints generated by the proposed CAVGN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11710v1-abstract-full').style.display = 'none'; document.getElementById('2502.11710v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published in IEEE Transactions on Circuits and Systems for Video Technology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11681">arXiv:2502.11681</a> <span> [<a href="https://arxiv.org/pdf/2502.11681">pdf</a>, <a href="https://arxiv.org/format/2502.11681">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> RIDE: Enhancing Large Language Model Alignment through Restyled In-Context Learning Demonstration Exemplars </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lizhen Qu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuang Li</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+H">Hao Xue</a>, <a href="/search/cs?searchtype=author&query=Salim%2C+F+D">Flora D. Salim</a>, <a href="/search/cs?searchtype=author&query=Haffari%2C+G">Gholamreza Haffari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11681v1-abstract-short" style="display: inline;"> Alignment tuning is crucial for ensuring large language models (LLMs) behave ethically and helpfully. Current alignment approaches require high-quality annotations and significant training resources. This paper proposes a low-cost, tuning-free method using in-context learning (ICL) to enhance LLM alignment. Through an analysis of high-quality ICL demos, we identified style as a key factor influenc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11681v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11681v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11681v1-abstract-full" style="display: none;"> Alignment tuning is crucial for ensuring large language models (LLMs) behave ethically and helpfully. Current alignment approaches require high-quality annotations and significant training resources. This paper proposes a low-cost, tuning-free method using in-context learning (ICL) to enhance LLM alignment. Through an analysis of high-quality ICL demos, we identified style as a key factor influencing LLM alignment capabilities and explicitly restyled ICL exemplars based on this stylistic framework. Additionally, we combined the restyled demos to achieve a balance between the two conflicting aspects of LLM alignment--factuality and safety. We packaged the restyled examples as prompts to trigger few-shot learning, improving LLM alignment. Compared to the best baseline approach, with an average score of 5.00 as the maximum, our method achieves a maximum 0.10 increase on the Alpaca task (from 4.50 to 4.60), a 0.22 enhancement on the Just-eval benchmark (from 4.34 to 4.56), and a maximum improvement of 0.32 (from 3.53 to 3.85) on the MT-Bench dataset. We release the code and data at https://github.com/AnonymousCode-ComputerScience/RIDE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11681v1-abstract-full').style.display = 'none'; document.getElementById('2502.11681v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">37 pages, 1 figure, 20 tables; The paper is under review</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11586">arXiv:2502.11586</a> <span> [<a href="https://arxiv.org/pdf/2502.11586">pdf</a>, <a href="https://arxiv.org/format/2502.11586">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Syllables to Scenes: Literary-Guided Free-Viewpoint 3D Scene Synthesis from Japanese Haiku </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chunan Yu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yidong Han</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+C">Chaotao Ding</a>, <a href="/search/cs?searchtype=author&query=Zang%2C+Y">Ying Zang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lanyun Zhu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xinhao Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zejian Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Renjun Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianrun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11586v1-abstract-short" style="display: inline;"> In the era of the metaverse, where immersive technologies redefine human experiences, translating abstract literary concepts into navigable 3D environments presents a fundamental challenge in preserving semantic and emotional fidelity. This research introduces HaikuVerse, a novel framework for transforming poetic abstraction into spatial representation, with Japanese Haiku serving as an ideal test… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11586v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11586v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11586v1-abstract-full" style="display: none;"> In the era of the metaverse, where immersive technologies redefine human experiences, translating abstract literary concepts into navigable 3D environments presents a fundamental challenge in preserving semantic and emotional fidelity. This research introduces HaikuVerse, a novel framework for transforming poetic abstraction into spatial representation, with Japanese Haiku serving as an ideal test case due to its sophisticated encapsulation of profound emotions and imagery within minimal text. While existing text-to-3D methods struggle with nuanced interpretations, we present a literary-guided approach that synergizes traditional poetry analysis with advanced generative technologies. Our framework centers on two key innovations: (1) Hierarchical Literary-Criticism Theory Grounded Parsing (H-LCTGP), which captures both explicit imagery and implicit emotional resonance through structured semantic decomposition, and (2) Progressive Dimensional Synthesis (PDS), a multi-stage pipeline that systematically transforms poetic elements into coherent 3D scenes through sequential diffusion processes, geometric optimization, and real-time enhancement. Extensive experiments demonstrate that HaikuVerse significantly outperforms conventional text-to-3D approaches in both literary fidelity and visual quality, establishing a new paradigm for preserving cultural heritage in immersive digital spaces. Project website at: https://syllables-to-scenes.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11586v1-abstract-full').style.display = 'none'; document.getElementById('2502.11586v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 11 figures, submitted to IJCAI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11573">arXiv:2502.11573</a> <span> [<a href="https://arxiv.org/pdf/2502.11573">pdf</a>, <a href="https://arxiv.org/format/2502.11573">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> InfiR : Crafting Effective Small Language Models and Multimodal Small Language Models in Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+C">Congkai Xie</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+S">Shuo Cai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenjun Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pengxiang Li</a>, <a href="/search/cs?searchtype=author&query=Sang%2C+Z">Zhijie Sang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+K">Kejing Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yiming Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhen Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+G">Guanghao Zhu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zeyu Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yang Yu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuhang Liu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Su Lu</a>, <a href="/search/cs?searchtype=author&query=He%2C+B">Baoyi He</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qi Zhou</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xiaotian Han</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jianbo Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shengyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fei Wu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongxia Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11573v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) and Multimodal Large Language Models (MLLMs) have made significant advancements in reasoning capabilities. However, they still face challenges such as high computational demands and privacy concerns. This paper focuses on developing efficient Small Language Models (SLMs) and Multimodal Small Language Models (MSLMs) that retain competitive reasoning abilities. We introd… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11573v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11573v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11573v1-abstract-full" style="display: none;"> Large Language Models (LLMs) and Multimodal Large Language Models (MLLMs) have made significant advancements in reasoning capabilities. However, they still face challenges such as high computational demands and privacy concerns. This paper focuses on developing efficient Small Language Models (SLMs) and Multimodal Small Language Models (MSLMs) that retain competitive reasoning abilities. We introduce a novel training pipeline that enhances reasoning capabilities and facilitates deployment on edge devices, achieving state-of-the-art performance while minimizing development costs. \InfR~ aims to advance AI systems by improving reasoning, reducing adoption barriers, and addressing privacy concerns through smaller model sizes. Resources are available at https://github. com/Reallm-Labs/InfiR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11573v1-abstract-full').style.display = 'none'; document.getElementById('2502.11573v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11550">arXiv:2502.11550</a> <span> [<a href="https://arxiv.org/pdf/2502.11550">pdf</a>, <a href="https://arxiv.org/format/2502.11550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Trinity: A Scalable and Forward-Secure DSSE for Spatio-Temporal Range Query </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhijun Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kuizhi Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Minghui Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiangyu Wang</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+Y">Yinbin Miao</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianfeng Ma</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiuzhen Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11550v1-abstract-short" style="display: inline;"> Cloud-based outsourced Location-based services have profound impacts on various aspects of people's lives but bring security concerns. Existing spatio-temporal data secure retrieval schemes have significant shortcomings regarding dynamic updates, either compromising privacy through leakage during updates (forward insecurity) or incurring excessively high update costs that hinder practical applicat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11550v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11550v1-abstract-full" style="display: none;"> Cloud-based outsourced Location-based services have profound impacts on various aspects of people's lives but bring security concerns. Existing spatio-temporal data secure retrieval schemes have significant shortcomings regarding dynamic updates, either compromising privacy through leakage during updates (forward insecurity) or incurring excessively high update costs that hinder practical application. Under these circumstances, we first propose a basic filter-based spatio-temporal range query scheme \TrinityI that supports low-cost dynamic updates and automatic expansion. Furthermore, to improve security, reduce storage cost, and false positives, we propose a forward secure and verifiable scheme \TrinityII that simultaneously minimizes storage overhead. A formal security analysis proves that \TrinityI and \TrinityII are Indistinguishable under Selective Chosen-Plaintext Attack (IND-SCPA). Finally, extensive experiments demonstrate that our design \TrinityII significantly reduces storage requirements by 80\%, enables data retrieval at the 1 million-record level in just 0.01 seconds, and achieves 10 $\times$ update efficiency than state-of-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11550v1-abstract-full').style.display = 'none'; document.getElementById('2502.11550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14pages,6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11528">arXiv:2502.11528</a> <span> [<a href="https://arxiv.org/pdf/2502.11528">pdf</a>, <a href="https://arxiv.org/format/2502.11528">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Personalized Large Language Models: Progress and Future Directions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiahong Liu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+Z">Zexuan Qiu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhongyang Li</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+Q">Quanyu Dai</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jieming Zhu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+M">Minda Hu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Menglin Yang</a>, <a href="/search/cs?searchtype=author&query=King%2C+I">Irwin King</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11528v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) excel in handling general knowledge tasks, yet they struggle with user-specific personalization, such as understanding individual emotions, writing styles, and preferences. Personalized Large Language Models (PLLMs) tackle these challenges by leveraging individual user data, such as user profiles, historical dialogues, content, and interactions, to deliver responses th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11528v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11528v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11528v1-abstract-full" style="display: none;"> Large Language Models (LLMs) excel in handling general knowledge tasks, yet they struggle with user-specific personalization, such as understanding individual emotions, writing styles, and preferences. Personalized Large Language Models (PLLMs) tackle these challenges by leveraging individual user data, such as user profiles, historical dialogues, content, and interactions, to deliver responses that are contextually relevant and tailored to each user's specific needs. This is a highly valuable research topic, as PLLMs can significantly enhance user satisfaction and have broad applications in conversational agents, recommendation systems, emotion recognition, medical assistants, and more. This survey reviews recent advancements in PLLMs from three technical perspectives: prompting for personalized context (input level), finetuning for personalized adapters (model level), and alignment for personalized preferences (objective level). To provide deeper insights, we also discuss current limitations and outline several promising directions for future research. Updated information about this survey can be found at the https://github.com/JiahongLiu21/Awesome-Personalized-Large-Language-Models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11528v1-abstract-full').style.display = 'none'; document.getElementById('2502.11528v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7pages, 5 figures, Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11486">arXiv:2502.11486</a> <span> [<a href="https://arxiv.org/pdf/2502.11486">pdf</a>, <a href="https://arxiv.org/format/2502.11486">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Anti-Degeneracy Scheme for Lidar SLAM based on Particle Filter in Geometry Feature-Less Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yanbin Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiguo Zhang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaogang Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziruo Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mingming Zhang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+H">Hongping Xie</a>, <a href="/search/cs?searchtype=author&query=Chi%2C+W">Wenzheng Chi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11486v1-abstract-short" style="display: inline;"> Simultaneous localization and mapping (SLAM) based on particle filtering has been extensively employed in indoor scenarios due to its high efficiency. However, in geometry feature-less scenes, the accuracy is severely reduced due to lack of constraints. In this article, we propose an anti-degeneracy system based on deep learning. Firstly, we design a scale-invariant linear mapping to convert coord… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11486v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11486v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11486v1-abstract-full" style="display: none;"> Simultaneous localization and mapping (SLAM) based on particle filtering has been extensively employed in indoor scenarios due to its high efficiency. However, in geometry feature-less scenes, the accuracy is severely reduced due to lack of constraints. In this article, we propose an anti-degeneracy system based on deep learning. Firstly, we design a scale-invariant linear mapping to convert coordinates in continuous space into discrete indexes, in which a data augmentation method based on Gaussian model is proposed to ensure the model performance by effectively mitigating the impact of changes in the number of particles on the feature distribution. Secondly, we develop a degeneracy detection model using residual neural networks (ResNet) and transformer which is able to identify degeneracy by scrutinizing the distribution of the particle population. Thirdly, an adaptive anti-degeneracy strategy is designed, which first performs fusion and perturbation on the resample process to provide rich and accurate initial values for the pose optimization, and use a hierarchical pose optimization combining coarse and fine matching, which is able to adaptively adjust the optimization frequency and the sensor trustworthiness according to the degree of degeneracy, in order to enhance the ability of searching the global optimal pose. Finally, we demonstrate the optimality of the model, as well as the improvement of the image matrix method and GPU on the computation time through ablation experiments, and verify the performance of the anti-degeneracy system in different scenarios through simulation experiments and real experiments. This work has been submitted to IEEE for publication. Copyright may be transferred without notice, after which this version may no longer be available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11486v1-abstract-full').style.display = 'none'; document.getElementById('2502.11486v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11481">arXiv:2502.11481</a> <span> [<a href="https://arxiv.org/pdf/2502.11481">pdf</a>, <a href="https://arxiv.org/format/2502.11481">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Variable-frame CNNLSTM for Breast Nodule Classification using Ultrasound Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cui%2C+X">Xiangxiang Cui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhongyu Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xiayue Fan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+P">Peng Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Ying Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Meng Yang</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+S">Shi Chang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jihua Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11481v1-abstract-short" style="display: inline;"> The intersection of medical imaging and artificial intelligence has become an important research direction in intelligent medical treatment, particularly in the analysis of medical images using deep learning for clinical diagnosis. Despite the advances, existing keyframe classification methods lack extraction of time series features, while ultrasonic video classification based on three-dimensional… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11481v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11481v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11481v1-abstract-full" style="display: none;"> The intersection of medical imaging and artificial intelligence has become an important research direction in intelligent medical treatment, particularly in the analysis of medical images using deep learning for clinical diagnosis. Despite the advances, existing keyframe classification methods lack extraction of time series features, while ultrasonic video classification based on three-dimensional convolution requires uniform frame numbers across patients, resulting in poor feature extraction efficiency and model classification performance. This study proposes a novel video classification method based on CNN and LSTM, introducing NLP's long and short sentence processing scheme into video classification for the first time. The method reduces CNN-extracted image features to 1x512 dimension, followed by sorting and compressing feature vectors for LSTM training. Specifically, feature vectors are sorted by patient video frame numbers and populated with padding value 0 to form variable batches, with invalid padding values compressed before LSTM training to conserve computing resources. Experimental results demonstrate that our variable-frame CNNLSTM method outperforms other approaches across all metrics, showing improvements of 3-6% in F1 score and 1.5% in specificity compared to keyframe methods. The variable-frame CNNLSTM also achieves better accuracy and precision than equal-frame CNNLSTM. These findings validate the effectiveness of our approach in classifying variable-frame ultrasound videos and suggest potential applications in other medical imaging modalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11481v1-abstract-full').style.display = 'none'; document.getElementById('2502.11481v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11468">arXiv:2502.11468</a> <span> [<a href="https://arxiv.org/pdf/2502.11468">pdf</a>, <a href="https://arxiv.org/ps/2502.11468">ps</a>, <a href="https://arxiv.org/format/2502.11468">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Semantically Robust Unsupervised Image Translation for Paired Remote Sensing Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fang%2C+S">Sheng Fang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kaiyu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhe Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jianli Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xingli Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11468v1-abstract-short" style="display: inline;"> Image translation for change detection or classification in bi-temporal remote sensing images is unique. Although it can acquire paired images, it is still unsupervised. Moreover, strict semantic preservation in translation is always needed instead of multimodal outputs. In response to these problems, this paper proposes a new method, SRUIT (Semantically Robust Unsupervised Image-to-image Translat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11468v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11468v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11468v1-abstract-full" style="display: none;"> Image translation for change detection or classification in bi-temporal remote sensing images is unique. Although it can acquire paired images, it is still unsupervised. Moreover, strict semantic preservation in translation is always needed instead of multimodal outputs. In response to these problems, this paper proposes a new method, SRUIT (Semantically Robust Unsupervised Image-to-image Translation), which ensures semantically robust translation and produces deterministic output. Inspired by previous works, the method explores the underlying characteristics of bi-temporal Remote Sensing images and designs the corresponding networks. Firstly, we assume that bi-temporal Remote Sensing images share the same latent space, for they are always acquired from the same land location. So SRUIT makes the generators share their high-level layers, and this constraint will compel two domain mapping to fall into the same latent space. Secondly, considering land covers of bi-temporal images could evolve into each other, SRUIT exploits the cross-cycle-consistent adversarial networks to translate from one to the other and recover them. Experimental results show that constraints of sharing weights and cross-cycle consistency enable translated images with both good perceptual image quality and semantic preservation for significant differences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11468v1-abstract-full').style.display = 'none'; document.getElementById('2502.11468v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11368">arXiv:2502.11368</a> <span> [<a href="https://arxiv.org/pdf/2502.11368">pdf</a>, <a href="https://arxiv.org/format/2502.11368">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LLMs can Perform Multi-Dimensional Analytic Writing Assessments: A Case Study of L2 Graduate-Level Academic English Writing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhengxiang Wang</a>, <a href="/search/cs?searchtype=author&query=Makarova%2C+V">Veronika Makarova</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhi Li</a>, <a href="/search/cs?searchtype=author&query=Kodner%2C+J">Jordan Kodner</a>, <a href="/search/cs?searchtype=author&query=Rambow%2C+O">Owen Rambow</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11368v1-abstract-short" style="display: inline;"> The paper explores the performance of LLMs in the context of multi-dimensional analytic writing assessments, i.e. their ability to provide both scores and comments based on multiple assessment criteria. Using a corpus of literature reviews written by L2 graduate students and assessed by human experts against 9 analytic criteria, we prompt several popular LLMs to perform the same task under various… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11368v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11368v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11368v1-abstract-full" style="display: none;"> The paper explores the performance of LLMs in the context of multi-dimensional analytic writing assessments, i.e. their ability to provide both scores and comments based on multiple assessment criteria. Using a corpus of literature reviews written by L2 graduate students and assessed by human experts against 9 analytic criteria, we prompt several popular LLMs to perform the same task under various conditions. To evaluate the quality of feedback comments, we apply a novel feedback comment quality evaluation framework. This framework is interpretable, cost-efficient, scalable, and reproducible, compared to existing methods that rely on manual judgments. We find that LLMs can generate reasonably good and generally reliable multi-dimensional analytic assessments. We release our corpus for reproducibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11368v1-abstract-full').style.display = 'none'; document.getElementById('2502.11368v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages, 6 figures, 15 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11196">arXiv:2502.11196</a> <span> [<a href="https://arxiv.org/pdf/2502.11196">pdf</a>, <a href="https://arxiv.org/format/2502.11196">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> How Do LLMs Acquire New Knowledge? A Knowledge Circuits Perspective on Continual Pre-Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ou%2C+Y">Yixin Ou</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yunzhi Yao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+N">Ningyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+H">Hui Jin</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jiacheng Sun</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+S">Shumin Deng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenguo Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11196v1-abstract-short" style="display: inline;"> Despite exceptional capabilities in knowledge-intensive tasks, Large Language Models (LLMs) face a critical gap in understanding how they internalize new knowledge, particularly how to structurally embed acquired knowledge in their neural computations. We address this issue through the lens of knowledge circuit evolution, identifying computational subgraphs that facilitate knowledge storage and pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11196v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11196v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11196v1-abstract-full" style="display: none;"> Despite exceptional capabilities in knowledge-intensive tasks, Large Language Models (LLMs) face a critical gap in understanding how they internalize new knowledge, particularly how to structurally embed acquired knowledge in their neural computations. We address this issue through the lens of knowledge circuit evolution, identifying computational subgraphs that facilitate knowledge storage and processing. Our systematic analysis of circuit evolution throughout continual pre-training reveals several key findings: (1) the acquisition of new knowledge is influenced by its relevance to pre-existing knowledge; (2) the evolution of knowledge circuits exhibits a distinct phase shift from formation to optimization; (3) the evolution of knowledge circuits follows a deep-to-shallow pattern. These insights not only advance our theoretical understanding of the mechanisms of new knowledge acquisition in LLMs, but also provide potential implications for improving continual pre-training strategies to enhance model performance. Code and data will be available at https://github.com/zjunlp/DynamicKnowledgeCircuits. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11196v1-abstract-full').style.display = 'none'; document.getElementById('2502.11196v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11169">arXiv:2502.11169</a> <span> [<a href="https://arxiv.org/pdf/2502.11169">pdf</a>, <a href="https://arxiv.org/format/2502.11169">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Constrained Monte Carlo Tree Search to Generate Reliable Long Chain-of-Thought for Mathematical Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Qingwen Lin</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+B">Boyan Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zijian Li</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+Z">Zhifeng Hao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Keli Zhang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+R">Ruichu Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11169v1-abstract-short" style="display: inline;"> Recently, Long Chain-of-Thoughts (CoTs) have gained widespread attention for improving the reasoning capabilities of Large Language Models (LLMs). This necessitates that existing LLMs, which lack the ability to generate Long CoTs, to acquire such capability through post-training methods. Without additional training, LLMs typically enhance their mathematical reasoning abilities through inference sc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11169v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11169v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11169v1-abstract-full" style="display: none;"> Recently, Long Chain-of-Thoughts (CoTs) have gained widespread attention for improving the reasoning capabilities of Large Language Models (LLMs). This necessitates that existing LLMs, which lack the ability to generate Long CoTs, to acquire such capability through post-training methods. Without additional training, LLMs typically enhance their mathematical reasoning abilities through inference scaling methods such as MCTS. However, they are hindered by the large action space and inefficient search strategies, making it challenging to generate Long CoTs effectively. To tackle this issue, we propose constraining the action space and guiding the emergence of Long CoTs through a refined search strategy. In our proposed Constrained Monte Carlo Tree Search (C-MCTS) framework, we limit the actions selected from a constrained action space, which is divided into five disjoint subsets: \emph{understanding}, \emph{planning}, \emph{reflection}, \emph{coding}, and \emph{summary}. Each subset is further constrained to a small number of predefined prompts, rather than allowing LLMs to generate actions arbitrarily. Additionally, we refine the search strategy by incorporating prior knowledge about the action sets, such as a human-like partial order of the action subsets and the pretrained process reward models. These strategies work together to significantly reduce the vast search space of Long CoTs. Extensive evaluations on mathematical reasoning benchmarks show that, under zero-shot settings, our method enables the 7B model to achieve reasoning capabilities that surpass those of the 72B model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11169v1-abstract-full').style.display = 'none'; document.getElementById('2502.11169v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11157">arXiv:2502.11157</a> <span> [<a href="https://arxiv.org/pdf/2502.11157">pdf</a>, <a href="https://arxiv.org/format/2502.11157">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Dyve: Thinking Fast and Slow for Dynamic Process Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhong%2C+J">Jianyuan Zhong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zeju Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhijian Xu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+X">Xiangyu Wen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qiang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11157v1-abstract-short" style="display: inline;"> We present Dyve, a dynamic process verifier that enhances reasoning error detection in large language models by integrating fast and slow thinking, inspired by Kahneman's Systems Theory. Dyve adaptively applies immediate token-level confirmation System 1 for straightforward steps and comprehensive analysis System 2 for complex ones. Leveraging a novel step-wise consensus-filtered process supervisi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11157v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11157v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11157v1-abstract-full" style="display: none;"> We present Dyve, a dynamic process verifier that enhances reasoning error detection in large language models by integrating fast and slow thinking, inspired by Kahneman's Systems Theory. Dyve adaptively applies immediate token-level confirmation System 1 for straightforward steps and comprehensive analysis System 2 for complex ones. Leveraging a novel step-wise consensus-filtered process supervision technique, combining Monte Carlo estimation with LLM based evaluation, Dyve curates high-quality supervision signals from noisy data. Experimental results on ProcessBench and the MATH dataset confirm that Dyve significantly outperforms existing process-based verifiers and boosts performance in Best-of-N settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11157v1-abstract-full').style.display = 'none'; document.getElementById('2502.11157v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11149">arXiv:2502.11149</a> <span> [<a href="https://arxiv.org/pdf/2502.11149">pdf</a>, <a href="https://arxiv.org/format/2502.11149">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Large Language-Geometry Model: When LLM meets Equivariance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zongzhao Li</a>, <a href="/search/cs?searchtype=author&query=Cen%2C+J">Jiacheng Cen</a>, <a href="/search/cs?searchtype=author&query=Su%2C+B">Bing Su</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenbing Huang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+T">Tingyang Xu</a>, <a href="/search/cs?searchtype=author&query=Rong%2C+Y">Yu Rong</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Deli Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11149v2-abstract-short" style="display: inline;"> Accurately predicting 3D structures and dynamics of physical systems is crucial in scientific applications. Existing approaches that rely on geometric Graph Neural Networks (GNNs) effectively enforce $\mathrm{E}(3)$-equivariance, but they often fall in leveraging extensive broader information. While direct application of Large Language Models (LLMs) can incorporate external knowledge, they lack th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11149v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11149v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11149v2-abstract-full" style="display: none;"> Accurately predicting 3D structures and dynamics of physical systems is crucial in scientific applications. Existing approaches that rely on geometric Graph Neural Networks (GNNs) effectively enforce $\mathrm{E}(3)$-equivariance, but they often fall in leveraging extensive broader information. While direct application of Large Language Models (LLMs) can incorporate external knowledge, they lack the capability for spatial reasoning with guaranteed equivariance. In this paper, we propose EquiLLM, a novel framework for representing 3D physical systems that seamlessly integrates E(3)-equivariance with LLM capabilities. Specifically, EquiLLM comprises four key components: geometry-aware prompting, an equivariant encoder, an LLM, and an equivariant adaptor. Essentially, the LLM guided by the instructive prompt serves as a sophisticated invariant feature processor, while 3D directional information is exclusively handled by the equivariant encoder and adaptor modules. Experimental results demonstrate that EquiLLM delivers significant improvements over previous methods across molecular dynamics simulation, human motion simulation, and antibody design, highlighting its promising generalizability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11149v2-abstract-full').style.display = 'none'; document.getElementById('2502.11149v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11147">arXiv:2502.11147</a> <span> [<a href="https://arxiv.org/pdf/2502.11147">pdf</a>, <a href="https://arxiv.org/format/2502.11147">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Efficient Long-Decoding Inference with Reasoning-Aware Attention Sparsity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+J">Junhao Hu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenrui Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weidong Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenwen Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+T">Tiancheng Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhixia Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xusheng Chen</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+Y">Yizhou Shan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11147v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated strong capabilities across various domains, with recent advancements in challenging reasoning tasks such as mathematics and programming. However, solving reasoning tasks often requires long decoding chains (of thoughts), which incur $O(N)$ time and memory consumption, where $N$ is the chain length. To mitigate $O(N)$ time and memory consumption, exist… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11147v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11147v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11147v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated strong capabilities across various domains, with recent advancements in challenging reasoning tasks such as mathematics and programming. However, solving reasoning tasks often requires long decoding chains (of thoughts), which incur $O(N)$ time and memory consumption, where $N$ is the chain length. To mitigate $O(N)$ time and memory consumption, existing sparsity-based algorithms propose retaining only the most critical token's intermediate data (i.e., key-value cache) and discarding the rest. However, these existing algorithms struggle with the ``impossible trinity'' of accuracy, time, and memory. For example, the state-of-the-art algorithm, Quest, achieves high accuracy with $O(L)$ time but $O(N)$ memory ($L$ is the cache budget, $L \ll N$). To address this issue, in this paper, we identify a new attention pattern during the decode stage of reasoning tasks, where milestone tokens (analogous to lemmas in mathematical proofs) emerge, are utilized, and then become unimportant afterward. Based on this pattern, we propose a new algorithm named RaaS that identifies and retains milestone tokens only until they are no longer needed, achieving high accuracy with $O(L)$ time and $O(L)$ memory complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11147v1-abstract-full').style.display = 'none'; document.getElementById('2502.11147v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11122">arXiv:2502.11122</a> <span> [<a href="https://arxiv.org/pdf/2502.11122">pdf</a>, <a href="https://arxiv.org/format/2502.11122">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Expert Prompt for Large-Language-Model: An Approach Defeat Elite AI in TextStarCraft II for the First Time </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zongyuan Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chang Lu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaojie Xu</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+R">Runnan Qi</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Y">Yanan Ni</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Lumin Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiangbei Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuebo Zhang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yongchun Fang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kuihua Huang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xian Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11122v1-abstract-short" style="display: inline;"> Since the emergence of the Large Language Model (LLM), LLM has been widely used in fields such as writing, translating, and searching. However, there is still great potential for LLM-based methods in handling complex tasks such as decision-making in the StarCraft II environment. To address problems such as lack of relevant knowledge and poor control over subtasks of varying importance, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11122v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11122v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11122v1-abstract-full" style="display: none;"> Since the emergence of the Large Language Model (LLM), LLM has been widely used in fields such as writing, translating, and searching. However, there is still great potential for LLM-based methods in handling complex tasks such as decision-making in the StarCraft II environment. To address problems such as lack of relevant knowledge and poor control over subtasks of varying importance, we propose a Hierarchical Expert Prompt (HEP) for LLM. Our method improves the understanding of game situations through expert-level tactical knowledge, improving the processing quality of tasks of varying importance through a hierarchical framework. Our approach defeated the highest level (Elite) standard built-in agent in TextStarCraft II for the first time and consistently outperformed the baseline method in other difficulties. Our experiments suggest that the proposed method is a practical solution for tackling complex decision-making challenges. The replay video can be viewed on https://www.bilibili.com/video/BV1uz42187EF and https://youtu.be/dO3PshWLV5M, and our codes have been open-sourced on https://github.com/luchang1113/HEP-LLM-play-StarCraftII. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11122v1-abstract-full').style.display = 'none'; document.getElementById('2502.11122v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11026">arXiv:2502.11026</a> <span> [<a href="https://arxiv.org/pdf/2502.11026">pdf</a>, <a href="https://arxiv.org/format/2502.11026">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Simplify RLHF as Reward-Weighted SFT: A Variational Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+Y">Yuhao Du</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuo Li</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+P">Pengyu Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhihong Chen</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yuejiao Xie</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+X">Xiang Wan</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+A">Anningzhe Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11026v2-abstract-short" style="display: inline;"> Reinforcement Learning from Human Feedback (RLHF) is crucial for aligning Large Language Models (LLMs) with human values. However, RLHF has been continuously challenged by its high complexity in implementation and computation consumption. Even with recent simplifications, such as Direct Preference Optimization (DPO) and Advantage Leftover Lunch (A-LoL), the problems of over-fitting and training in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11026v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11026v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11026v2-abstract-full" style="display: none;"> Reinforcement Learning from Human Feedback (RLHF) is crucial for aligning Large Language Models (LLMs) with human values. However, RLHF has been continuously challenged by its high complexity in implementation and computation consumption. Even with recent simplifications, such as Direct Preference Optimization (DPO) and Advantage Leftover Lunch (A-LoL), the problems of over-fitting and training instability remain hindering the alignment process from the expected optimal performance. To address the existing challenges, we propose a novel simplification of RLHF from the perspective of variational inference, called $\textbf{V}$ariational $\textbf{A}$lignment with $\textbf{R}$e-weighting ($\textbf{VAR}$). More specifically, by directly minimizing the distribution gap between the learning LLM policy and the optimal solution of RLHF, we transform the alignment objective into a reward-driven re-weighted supervised fine-tuning (SFT) form, which only requires minor adjustment on the SFT loss to obtain noticeable improvement on training stability and effectiveness. On comprehensive alignment and generation benchmarks, our VAR method has numerically achieved competitive performance in LLM alignment helpfulness and harmlessness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11026v2-abstract-full').style.display = 'none'; document.getElementById('2502.11026v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11019">arXiv:2502.11019</a> <span> [<a href="https://arxiv.org/pdf/2502.11019">pdf</a>, <a href="https://arxiv.org/format/2502.11019">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unlocking the Power of Function Vectors for Characterizing and Mitigating Catastrophic Forgetting in Continual Instruction Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+G">Gangwei Jiang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Caigao Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoyi Li</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+S">Siqiao Xue</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linqi Song</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Y">Yin Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11019v1-abstract-short" style="display: inline;"> Catastrophic forgetting (CF) poses a significant challenge in machine learning, where a model forgets previously learned information upon learning new tasks. Despite the advanced capabilities of Large Language Models (LLMs), they continue to face challenges with CF during continual learning. The majority of existing research focuses on analyzing forgetting patterns through a singular training sequ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11019v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11019v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11019v1-abstract-full" style="display: none;"> Catastrophic forgetting (CF) poses a significant challenge in machine learning, where a model forgets previously learned information upon learning new tasks. Despite the advanced capabilities of Large Language Models (LLMs), they continue to face challenges with CF during continual learning. The majority of existing research focuses on analyzing forgetting patterns through a singular training sequence, thereby overlooking the intricate effects that diverse tasks have on model behavior. Our study explores CF across various settings, discovering that model forgetting is influenced by both the specific training tasks and the models themselves. To this end, we interpret forgetting by examining the function vector (FV), a compact representation of functions in LLMs, offering a model-dependent indicator for the occurrence of CF. Through theoretical and empirical analyses, we demonstrated that CF in LLMs primarily stems from biases in function activation rather than the overwriting of task processing functions. Leveraging these insights, we propose a novel function vector guided training methodology, incorporating a regularization technique to stabilize the FV and mitigate forgetting. Empirical tests on four benchmarks confirm the effectiveness of our proposed training method, substantiating our theoretical framework concerning CF and model function dynamics. We plan to make our code publicly accessible in the near future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11019v1-abstract-full').style.display = 'none'; document.getElementById('2502.11019v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10966">arXiv:2502.10966</a> <span> [<a href="https://arxiv.org/pdf/2502.10966">pdf</a>, <a href="https://arxiv.org/format/2502.10966">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Neural Networks Remember More: The Power of Parameter Isolation and Combination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+B">Biqing Zeng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zehan Li</a>, <a href="/search/cs?searchtype=author&query=Ayesh%2C+A">Aladdin Ayesh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10966v1-abstract-short" style="display: inline;"> Catastrophic forgetting is a pervasive issue for pre-trained language models (PLMs) during continual learning, where models lose previously acquired knowledge when sequentially trained on a series of tasks. The model's ability to retain old tasks is referred to as stability, while its adaptability to new tasks is called plasticity. Therefore, the key to solving this problem is to find a trade-off… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10966v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10966v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10966v1-abstract-full" style="display: none;"> Catastrophic forgetting is a pervasive issue for pre-trained language models (PLMs) during continual learning, where models lose previously acquired knowledge when sequentially trained on a series of tasks. The model's ability to retain old tasks is referred to as stability, while its adaptability to new tasks is called plasticity. Therefore, the key to solving this problem is to find a trade-off between the plasticity and stability of the model. To address this issue, in this paper, we propose a novel method to achieve a balance between model stability and plasticity, thereby mitigating catastrophic forgetting. More specifically, our proposed approach leverages parameter isolation and a subsequent combination strategy. Initially, in the training stage, the model adapts to each downstream task via a parameter isolation method to prevent potential interference among different tasks. We then combine all trained parameters, which contain acquired knowledge, using the task arithmetic method and finally apply them to the backbone model. Empirical evaluations on continual language learning benchmarks substantiate the effectiveness of our approach, revealing a marked enhancement over existing state-of-the-art approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10966v1-abstract-full').style.display = 'none'; document.getElementById('2502.10966v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10803">arXiv:2502.10803</a> <span> [<a href="https://arxiv.org/pdf/2502.10803">pdf</a>, <a href="https://arxiv.org/format/2502.10803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PDA: Generalizable Detection of AI-Generated Images via Post-hoc Distribution Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Li Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenyu Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+S">Shanqing Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10803v1-abstract-short" style="display: inline;"> The rapid advancement of generative models has led to the proliferation of highly realistic AI-generated images, posing significant challenges for detection methods to generalize across diverse and evolving generative techniques. Existing approaches often fail to adapt to unknown models without costly retraining, limiting their practicability. To fill this gap, we propose Post-hoc Distribution Ali… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10803v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10803v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10803v1-abstract-full" style="display: none;"> The rapid advancement of generative models has led to the proliferation of highly realistic AI-generated images, posing significant challenges for detection methods to generalize across diverse and evolving generative techniques. Existing approaches often fail to adapt to unknown models without costly retraining, limiting their practicability. To fill this gap, we propose Post-hoc Distribution Alignment (PDA), a novel approach for the generalizable detection for AI-generated images. The key idea is to use the known generative model to regenerate undifferentiated test images. This process aligns the distributions of the re-generated real images with the known fake images, enabling effective distinction from unknown fake images. PDA employs a two-step detection framework: 1) evaluating whether a test image aligns with the known fake distribution based on deep k-nearest neighbor (KNN) distance, and 2) re-generating test images using known generative models to create pseudo-fake images for further classification. This alignment strategy allows PDA to effectively detect fake images without relying on unseen data or requiring retraining. Extensive experiments demonstrate the superiority of PDA, achieving 96.73\% average accuracy across six state-of-the-art generative models, including GANs, diffusion models, and text-to-image models, and improving by 16.07\% over the best baseline. Through t-SNE visualizations and KNN distance analysis, we provide insights into PDA's effectiveness in separating real and fake images. Our work provides a flexible and effective solution for real-world fake image detection, advancing the generalization ability of detection systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10803v1-abstract-full').style.display = 'none'; document.getElementById('2502.10803v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10801">arXiv:2502.10801</a> <span> [<a href="https://arxiv.org/pdf/2502.10801">pdf</a>, <a href="https://arxiv.org/format/2502.10801">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FaceSwapGuard: Safeguarding Facial Privacy from DeepFake Threats through Identity Obfuscation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Li Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+S">Shouling Ji</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+S">Shanqing Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10801v1-abstract-short" style="display: inline;"> DeepFakes pose a significant threat to our society. One representative DeepFake application is face-swapping, which replaces the identity in a facial image with that of a victim. Although existing methods partially mitigate these risks by degrading the quality of swapped images, they often fail to disrupt the identity transformation effectively. To fill this gap, we propose FaceSwapGuard (FSG), a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10801v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10801v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10801v1-abstract-full" style="display: none;"> DeepFakes pose a significant threat to our society. One representative DeepFake application is face-swapping, which replaces the identity in a facial image with that of a victim. Although existing methods partially mitigate these risks by degrading the quality of swapped images, they often fail to disrupt the identity transformation effectively. To fill this gap, we propose FaceSwapGuard (FSG), a novel black-box defense mechanism against deepfake face-swapping threats. Specifically, FSG introduces imperceptible perturbations to a user's facial image, disrupting the features extracted by identity encoders. When shared online, these perturbed images mislead face-swapping techniques, causing them to generate facial images with identities significantly different from the original user. Extensive experiments demonstrate the effectiveness of FSG against multiple face-swapping techniques, reducing the face match rate from 90\% (without defense) to below 10\%. Both qualitative and quantitative studies further confirm its ability to confuse human perception, highlighting its practical utility. Additionally, we investigate key factors that may influence FSG and evaluate its robustness against various adaptive adversaries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10801v1-abstract-full').style.display = 'none'; document.getElementById('2502.10801v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10734">arXiv:2502.10734</a> <span> [<a href="https://arxiv.org/pdf/2502.10734">pdf</a>, <a href="https://arxiv.org/format/2502.10734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Motion planning for highly-dynamic unconditioned reflexes based on chained Signed Distance Functions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+K">Ken Lin</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Q">Qi Ye</a>, <a href="/search/cs?searchtype=author&query=Lam%2C+T+L">Tin Lun Lam</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhibin Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiming Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Gaofeng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10734v2-abstract-short" style="display: inline;"> The unconditioned reflex (e.g., protective reflex), which is the innate reaction of the organism and usually performed through the spinal cord rather than the brain, can enable organisms to escape harms from environments. In this paper, we propose an online, highly-dynamic motion planning algorithm to endow manipulators the highly-dynamic unconditioned reflexes to humans and/or environments. Our m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10734v2-abstract-full').style.display = 'inline'; document.getElementById('2502.10734v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10734v2-abstract-full" style="display: none;"> The unconditioned reflex (e.g., protective reflex), which is the innate reaction of the organism and usually performed through the spinal cord rather than the brain, can enable organisms to escape harms from environments. In this paper, we propose an online, highly-dynamic motion planning algorithm to endow manipulators the highly-dynamic unconditioned reflexes to humans and/or environments. Our method is based on a chained version of Signed Distance Functions (SDFs), which can be pre-computed and stored. Our proposed algorithm is divided into two stages. In the offline stage, we create 3 groups of local SDFs to store the geometric information of the manipulator and its working environment. In the online stage, the pre-computed local SDFs are chained together according the configuration of the manipulator, to provide global geometric information about the environment. While the point clouds of the dynamic objects serve as query points to look up these local SDFs for quickly generating escape velocity. Then we propose a modified geometric Jacobian matrix and use the Jacobian-pseudo-inverse method to generate real-time reflex behaviors to avoid the static and dynamic obstacles in the environment. The benefits of our method are validated in both static and dynamic scenarios. In the static scenario, our method identifies the path solutions with lower time consumption and shorter trajectory length compared to existing solutions. In the dynamic scenario, our method can reliably pursue the dynamic target point, avoid dynamic obstacles, and react to these obstacles within 1ms, which surpasses the unconditioned reflex reaction time of humans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10734v2-abstract-full').style.display = 'none'; document.getElementById('2502.10734v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10722">arXiv:2502.10722</a> <span> [<a href="https://arxiv.org/pdf/2502.10722">pdf</a>, <a href="https://arxiv.org/format/2502.10722">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> PMU-Data: Data Traces Could be Distinguished </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhouyang Li</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+P">Pengfei Qiu</a>, <a href="/search/cs?searchtype=author&query=Qing%2C+Y">Yu Qing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chunlu Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dongsheng Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+G">Gang Qu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10722v1-abstract-short" style="display: inline;"> Modern processors widely equip the Performance Monitoring Unit (PMU) to collect various architecture and microarchitecture events. Software developers often utilize the PMU to enhance program's performance, but the potential side effects that arise from its activation are often disregarded. In this paper, we find that the PMU can be employed to retrieve instruction operands. Based on this discover… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10722v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10722v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10722v1-abstract-full" style="display: none;"> Modern processors widely equip the Performance Monitoring Unit (PMU) to collect various architecture and microarchitecture events. Software developers often utilize the PMU to enhance program's performance, but the potential side effects that arise from its activation are often disregarded. In this paper, we find that the PMU can be employed to retrieve instruction operands. Based on this discovery, we introduce PMU-Data, a novel category of side-channel attacks aimed at leaking secret by identifying instruction operands with PMU. To achieve the PMU-Data attack, we develop five gadgets to encode the confidential data into distinct data-related traces while maintaining the control-flow unchanged. We then measure all documented PMU events on three physical machines with different processors while those gadgets are performing. We successfully identify two types of vulnerable gadgets caused by DIV and MOV instructions. Additionally, we discover 40 vulnerable PMU events that can be used to carry out the PMU-Data attack. We through real experiments to demonstrate the perniciousness of the PMU-Data attack by implementing three attack goals: (1) leaking the kernel data illegally combined with the transient execution vulnerabilities including Meltdown, Spectre, and Zombieload; (2) building a covert-channel to secretly transfer data; (3) extracting the secret data protected by the Trusted Execution Environment (TEE) combined with the Zombieload vulnerability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10722v1-abstract-full').style.display = 'none'; document.getElementById('2502.10722v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10537">arXiv:2502.10537</a> <span> [<a href="https://arxiv.org/pdf/2502.10537">pdf</a>, <a href="https://arxiv.org/format/2502.10537">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713103">10.1145/3706598.3713103 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Divisi: Interactive Search and Visualization for Scalable Exploratory Subgroup Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sivaraman%2C+V">Venkatesh Sivaraman</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zexuan Li</a>, <a href="/search/cs?searchtype=author&query=Perer%2C+A">Adam Perer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10537v1-abstract-short" style="display: inline;"> Analyzing data subgroups is a common data science task to build intuition about a dataset and identify areas to improve model performance. However, subgroup analysis is prohibitively difficult in datasets with many features, and existing tools limit unexpected discoveries by relying on user-defined or static subgroups. We propose exploratory subgroup analysis as a set of tasks in which practitione… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10537v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10537v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10537v1-abstract-full" style="display: none;"> Analyzing data subgroups is a common data science task to build intuition about a dataset and identify areas to improve model performance. However, subgroup analysis is prohibitively difficult in datasets with many features, and existing tools limit unexpected discoveries by relying on user-defined or static subgroups. We propose exploratory subgroup analysis as a set of tasks in which practitioners discover, evaluate, and curate interesting subgroups to build understanding about datasets and models. To support these tasks we introduce Divisi, an interactive notebook-based tool underpinned by a fast approximate subgroup discovery algorithm. Divisi's interface allows data scientists to interactively re-rank and refine subgroups and to visualize their overlap and coverage in the novel Subgroup Map. Through a think-aloud study with 13 practitioners, we find that Divisi can help uncover surprising patterns in data features and their interactions, and that it encourages more thorough exploration of subtypes in complex data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10537v1-abstract-full').style.display = 'none'; document.getElementById('2502.10537v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at CHI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10453">arXiv:2502.10453</a> <span> [<a href="https://arxiv.org/pdf/2502.10453">pdf</a>, <a href="https://arxiv.org/format/2502.10453">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Linking Cryptoasset Attribution Tags to Knowledge Graph Entities: An LLM-based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Avice%2C+R">R茅gnier Avice</a>, <a href="/search/cs?searchtype=author&query=Haslhofer%2C+B">Bernhard Haslhofer</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhidong Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jianlong Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10453v1-abstract-short" style="display: inline;"> Attribution tags form the foundation of modern cryptoasset forensics. However, inconsistent or incorrect tags can mislead investigations and even result in false accusations. To address this issue, we propose a novel computational method based on Large Language Models (LLMs) to link attribution tags with well-defined knowledge graph concepts. We implemented this method in an end-to-end pipeline an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10453v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10453v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10453v1-abstract-full" style="display: none;"> Attribution tags form the foundation of modern cryptoasset forensics. However, inconsistent or incorrect tags can mislead investigations and even result in false accusations. To address this issue, we propose a novel computational method based on Large Language Models (LLMs) to link attribution tags with well-defined knowledge graph concepts. We implemented this method in an end-to-end pipeline and conducted experiments showing that our approach outperforms baseline methods by up to 37.4% in F1-score across three publicly available attribution tag datasets. By integrating concept filtering and blocking procedures, we generate candidate sets containing five knowledge graph entities, achieving a recall of 93% without the need for labeled data. Additionally, we demonstrate that local LLM models can achieve F1-scores of 90%, comparable to remote models which achieve 94%. We also analyze the cost-performance trade-offs of various LLMs and prompt templates, showing that selecting the most cost-effective configuration can reduce costs by 90%, with only a 1% decrease in performance. Our method not only enhances attribution tag quality but also serves as a blueprint for fostering more reliable forensic evidence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10453v1-abstract-full').style.display = 'none'; document.getElementById('2502.10453v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at Financial Cryptography and Data Security 2025 Conference (FC2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10148">arXiv:2502.10148</a> <span> [<a href="https://arxiv.org/pdf/2502.10148">pdf</a>, <a href="https://arxiv.org/format/2502.10148">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Cooperative Multi-Agent Planning with Adaptive Skill Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiyuan Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wenshuai Zhao</a>, <a href="/search/cs?searchtype=author&query=Pajarinen%2C+J">Joni Pajarinen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10148v1-abstract-short" style="display: inline;"> Despite much progress in training distributed artificial intelligence (AI), building cooperative multi-agent systems with multi-agent reinforcement learning (MARL) faces challenges in sample efficiency, interpretability, and transferability. Unlike traditional learning-based methods that require extensive interaction with the environment, large language models (LLMs) demonstrate remarkable capabil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10148v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10148v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10148v1-abstract-full" style="display: none;"> Despite much progress in training distributed artificial intelligence (AI), building cooperative multi-agent systems with multi-agent reinforcement learning (MARL) faces challenges in sample efficiency, interpretability, and transferability. Unlike traditional learning-based methods that require extensive interaction with the environment, large language models (LLMs) demonstrate remarkable capabilities in zero-shot planning and complex reasoning. However, existing LLM-based approaches heavily rely on text-based observations and struggle with the non-Markovian nature of multi-agent interactions under partial observability. We present COMPASS, a novel multi-agent architecture that integrates vision-language models (VLMs) with a dynamic skill library and structured communication for decentralized closed-loop decision-making. The skill library, bootstrapped from demonstrations, evolves via planner-guided tasks to enable adaptive strategies. COMPASS propagates entity information through multi-hop communication under partial observability. Evaluations on the improved StarCraft Multi-Agent Challenge (SMACv2) demonstrate COMPASS achieves up to 30\% higher win rates than state-of-the-art MARL algorithms in symmetric scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10148v1-abstract-full').style.display = 'none'; document.getElementById('2502.10148v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10124">arXiv:2502.10124</a> <span> [<a href="https://arxiv.org/pdf/2502.10124">pdf</a>, <a href="https://arxiv.org/format/2502.10124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Modeling the Impact of Visual Stimuli on Redirection Noticeability with Gaze Behavior in Virtual Reality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhipeng Li</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+Y">Yishu Ji</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ruijia Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianqi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuntao Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yuanchun Shi</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukang Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10124v1-abstract-short" style="display: inline;"> While users could embody virtual avatars that mirror their physical movements in Virtual Reality, these avatars' motions can be redirected to enable novel interactions. Excessive redirection, however, could break the user's sense of embodiment due to perceptual conflicts between vision and proprioception. While prior work focused on avatar-related factors influencing the noticeability of redirecti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10124v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10124v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10124v1-abstract-full" style="display: none;"> While users could embody virtual avatars that mirror their physical movements in Virtual Reality, these avatars' motions can be redirected to enable novel interactions. Excessive redirection, however, could break the user's sense of embodiment due to perceptual conflicts between vision and proprioception. While prior work focused on avatar-related factors influencing the noticeability of redirection, we investigate how the visual stimuli in the surrounding virtual environment affect user behavior and, in turn, the noticeability of redirection. Given the wide variety of different types of visual stimuli and their tendency to elicit varying individual reactions, we propose to use users' gaze behavior as an indicator of their response to the stimuli and model the noticeability of redirection. We conducted two user studies to collect users' gaze behavior and noticeability, investigating the relationship between them and identifying the most effective gaze behavior features for predicting noticeability. Based on the data, we developed a regression model that takes users' gaze behavior as input and outputs the noticeability of redirection. We then conducted an evaluation study to test our model on unseen visual stimuli, achieving an accuracy of 0.012 MSE. We further implemented an adaptive redirection technique and conducted a proof-of-concept study to evaluate its effectiveness with complex visual stimuli in two applications. The results indicated that participants experienced less physical demanding and a stronger sense of body ownership when using our adaptive technique, demonstrating the potential of our model to support real-world use cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10124v1-abstract-full').style.display = 'none'; document.getElementById('2502.10124v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, CHI'25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09973">arXiv:2502.09973</a> <span> [<a href="https://arxiv.org/pdf/2502.09973">pdf</a>, <a href="https://arxiv.org/format/2502.09973">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713882">10.1145/3706598.3713882 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> InteRecon: Towards Reconstructing Interactivity of Personal Memorable Items in Mixed Reality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zisu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiawei Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zeyu Xiong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shumeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Faruqi%2C+F">Faraz Faruqi</a>, <a href="/search/cs?searchtype=author&query=Mueller%2C+S">Stefanie Mueller</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+C">Chen Liang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xiaojuan Ma</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+M">Mingming Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09973v1-abstract-short" style="display: inline;"> Digital capturing of memorable personal items is a key way to archive personal memories. Although current digitization methods (e.g., photos, videos, 3D scanning) can replicate the physical appearance of an item, they often cannot preserve its real-world interactivity. We present Interactive Digital Item (IDI), a concept of reconstructing both the physical appearance and, more importantly, the int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09973v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09973v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09973v1-abstract-full" style="display: none;"> Digital capturing of memorable personal items is a key way to archive personal memories. Although current digitization methods (e.g., photos, videos, 3D scanning) can replicate the physical appearance of an item, they often cannot preserve its real-world interactivity. We present Interactive Digital Item (IDI), a concept of reconstructing both the physical appearance and, more importantly, the interactivity of an item. We first conducted a formative study to understand users' expectations of IDI, identifying key physical interactivity features, including geometry, interfaces, and embedded content of items. Informed by these findings, we developed InteRecon, an AR prototype enabling personal reconstruction functions for IDI creation. An exploratory study was conducted to assess the feasibility of using InteRecon and explore the potential of IDI to enrich personal memory archives. Results show that InteRecon is feasible for IDI creation, and the concept of IDI brings new opportunities for augmenting personal memory archives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09973v1-abstract-full').style.display = 'none'; document.getElementById('2502.09973v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09967">arXiv:2502.09967</a> <span> [<a href="https://arxiv.org/pdf/2502.09967">pdf</a>, <a href="https://arxiv.org/format/2502.09967">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VicKAM: Visual Conceptual Knowledge Guided Action Map for Weakly Supervised Group Activity Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhuming Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yihao Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiarui Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yaofei Wu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yan Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zun Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lifang Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09967v1-abstract-short" style="display: inline;"> Existing weakly supervised group activity recognition methods rely on object detectors or attention mechanisms to capture key areas automatically. However, they overlook the semantic information associated with captured areas, which may adversely affect the recognition performance. In this paper, we propose a novel framework named Visual Conceptual Knowledge Guided Action Map (VicKAM) which effect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09967v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09967v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09967v1-abstract-full" style="display: none;"> Existing weakly supervised group activity recognition methods rely on object detectors or attention mechanisms to capture key areas automatically. However, they overlook the semantic information associated with captured areas, which may adversely affect the recognition performance. In this paper, we propose a novel framework named Visual Conceptual Knowledge Guided Action Map (VicKAM) which effectively captures the locations of individual actions and integrates them with action semantics for weakly supervised group activity recognition.It generates individual action prototypes from training set as visual conceptual knowledge to bridge action semantics and visual representations. Guided by this knowledge, VicKAM produces action maps that indicate the likelihood of each action occurring at various locations, based on image correlation theorem. It further augments individual action maps using group activity related statistical information, representing individual action distribution under different group activities, to establish connections between action maps and specific group activities. The augmented action map is incorporated with action semantic representations for group activity recognition.Extensive experiments on two public benchmarks, the Volleyball and the NBA datasets, demonstrate the effectiveness of our proposed method, even in cases of limited training data. The code will be released later. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09967v1-abstract-full').style.display = 'none'; document.getElementById('2502.09967v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09832">arXiv:2502.09832</a> <span> [<a href="https://arxiv.org/pdf/2502.09832">pdf</a>, <a href="https://arxiv.org/ps/2502.09832">ps</a>, <a href="https://arxiv.org/format/2502.09832">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Algorithmic contiguity from low-degree conjecture and applications in correlated random graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhangsong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09832v1-abstract-short" style="display: inline;"> In this paper, assuming a natural strengthening of the low-degree conjecture, we provide evidence of computational hardness for two problems: (1) the (partial) matching recovery problem in the sparse correlated Erd艖s-R茅nyi graphs $\mathcal G(n,q;蟻)$ when the edge-density $q=n^{-1+o(1)}$ and the correlation $蟻<\sqrt伪$ lies below the Otter's threshold, solving a remaining problem in \cite{DDL23+}; (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09832v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09832v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09832v1-abstract-full" style="display: none;"> In this paper, assuming a natural strengthening of the low-degree conjecture, we provide evidence of computational hardness for two problems: (1) the (partial) matching recovery problem in the sparse correlated Erd艖s-R茅nyi graphs $\mathcal G(n,q;蟻)$ when the edge-density $q=n^{-1+o(1)}$ and the correlation $蟻<\sqrt伪$ lies below the Otter's threshold, solving a remaining problem in \cite{DDL23+}; (2) the detection problem between the correlated sparse stochastic block model $\mathcal S(n,\tfrac位{n};k,蔚;s)$ and a pair of independent stochastic block models $\mathcal S(n,\tfrac{位s}{n};k,蔚)$ when $蔚^2 位s<1$ lies below the Kesten-Stigum (KS) threshold and $s<\sqrt伪$ lies below the Otter's threshold, solving a remaining problem in \cite{CDGL24+}. One of the main ingredient in our proof is to derive certain forms of \emph{algorithmic contiguity} between two probability measures based on bounds on their low-degree advantage. To be more precise, consider the high-dimensional hypothesis testing problem between two probability measures $\mathbb{P}$ and $\mathbb{Q}$ based on the sample $\mathsf Y$. We show that if the low-degree advantage $\mathsf{Adv}_{\leq D} \big( \frac{\mathrm{d}\mathbb{P}}{\mathrm{d}\mathbb{Q}} \big)=O(1)$, then (assuming the low-degree conjecture) there is no efficient algorithm $\mathcal A$ such that $\mathbb{Q}(\mathcal A(\mathsf Y)=0)=1-o(1)$ and $\mathbb{P}(\mathcal A(\mathsf Y)=1)=惟(1)$. This framework provides a useful tool for performing reductions between different inference tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09832v1-abstract-full').style.display = 'none'; document.getElementById('2502.09832v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">40 pages. arXiv admin note: text overlap with arXiv:2311.00289 by other authors</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68Q87; 62M20 </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Li%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+Z&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository