CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 3,145 results for author: <span class="mathjax">Yang, J</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Yang%2C+J">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Yang, J"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Yang%2C+J&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Yang, J"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08848">arXiv:2502.08848</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08848">pdf</a>, <a href="https://arxiv.org/format/2502.08848">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> SpeechCompass: Enhancing Mobile Captioning with Diarization and Directional Guidance via Multi-Microphone Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dementyev%2C+A">Artem Dementyev</a>, <a href="/search/cs?searchtype=author&amp;query=Kavensky%2C+D">Dimitri Kavensky</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+S+J">Samuel J. Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Parvaix%2C+M">Mathieu Parvaix</a>, <a href="/search/cs?searchtype=author&amp;query=Lai%2C+C">Chiong Lai</a>, <a href="/search/cs?searchtype=author&amp;query=Olwal%2C+A">Alex Olwal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08848v1-abstract-short" style="display: inline;"> Speech-to-text capabilities on mobile devices have proven helpful for hearing and speech accessibility, language translation, note-taking, and meeting transcripts. However, our foundational large-scale survey (n=263) shows that the inability to distinguish and indicate speaker direction makes them challenging in group conversations. SpeechCompass addresses this limitation through real-time, multi-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08848v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08848v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08848v1-abstract-full" style="display: none;"> Speech-to-text capabilities on mobile devices have proven helpful for hearing and speech accessibility, language translation, note-taking, and meeting transcripts. However, our foundational large-scale survey (n=263) shows that the inability to distinguish and indicate speaker direction makes them challenging in group conversations. SpeechCompass addresses this limitation through real-time, multi-microphone speech localization, where the direction of speech allows visual separation and guidance (e.g., arrows) in the user interface. We introduce efficient real-time audio localization algorithms and custom sound perception hardware running on a low-power microcontroller and four integrated microphones, which we characterize in technical evaluations. Informed by a large-scale survey (n=494), we conducted an in-person study of group conversations with eight frequent users of mobile speech-to-text, who provided feedback on five visualization styles. The value of diarization and visualizing localization was consistent across participants, with everyone agreeing on the value and potential of directional guidance for group conversations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08848v1-abstract-full').style.display = 'none'; document.getElementById('2502.08848v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CHI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08659">arXiv:2502.08659</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08659">pdf</a>, <a href="https://arxiv.org/format/2502.08659">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Deployment-friendly Lane-changing Intention Prediction Powered by Brain-inspired Spiking Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Junjie Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+S">Shuqi Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+H">Hui Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qiming Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+H">Hongliang Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+H">Hai Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08659v1-abstract-short" style="display: inline;"> Accurate and real-time prediction of surrounding vehicles&#39; lane-changing intentions is a critical challenge in deploying safe and efficient autonomous driving systems in open-world scenarios. Existing high-performing methods remain hard to deploy due to their high computational cost, long training times, and excessive memory requirements. Here, we propose an efficient lane-changing intention predi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08659v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08659v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08659v1-abstract-full" style="display: none;"> Accurate and real-time prediction of surrounding vehicles&#39; lane-changing intentions is a critical challenge in deploying safe and efficient autonomous driving systems in open-world scenarios. Existing high-performing methods remain hard to deploy due to their high computational cost, long training times, and excessive memory requirements. Here, we propose an efficient lane-changing intention prediction approach based on brain-inspired Spiking Neural Networks (SNN). By leveraging the event-driven nature of SNN, the proposed approach enables us to encode the vehicle&#39;s states in a more efficient manner. Comparison experiments conducted on HighD and NGSIM datasets demonstrate that our method significantly improves training efficiency and reduces deployment costs while maintaining comparable prediction accuracy. Particularly, compared to the baseline, our approach reduces training time by 75% and memory usage by 99.9%. These results validate the efficiency and reliability of our method in lane-changing predictions, highlighting its potential for safe and efficient autonomous driving systems while offering significant advantages in deployment, including reduced training time, lower memory usage, and faster inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08659v1-abstract-full').style.display = 'none'; document.getElementById('2502.08659v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07904">arXiv:2502.07904</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07904">pdf</a>, <a href="https://arxiv.org/format/2502.07904">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Intelligent Legal Assistant: An Interactive Clarification System for Legal Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yao%2C+R">Rujing Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yiquan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xuhui Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yuting Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiayin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+C">Changlong Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+F">Fang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiaozhong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07904v1-abstract-short" style="display: inline;"> The rise of large language models has opened new avenues for users seeking legal advice. However, users often lack professional legal knowledge, which can lead to questions that omit critical information. This deficiency makes it challenging for traditional legal question-answering systems to accurately identify users&#39; actual needs, often resulting in imprecise or generalized advice. In this work,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07904v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07904v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07904v1-abstract-full" style="display: none;"> The rise of large language models has opened new avenues for users seeking legal advice. However, users often lack professional legal knowledge, which can lead to questions that omit critical information. This deficiency makes it challenging for traditional legal question-answering systems to accurately identify users&#39; actual needs, often resulting in imprecise or generalized advice. In this work, we develop a legal question-answering system called Intelligent Legal Assistant, which interacts with users to precisely capture their needs. When a user poses a question, the system requests that the user select their geographical location to pinpoint the applicable laws. It then generates clarifying questions and options based on the key information missing from the user&#39;s initial question. This allows the user to select and provide the necessary details. Once all necessary information is provided, the system produces an in-depth legal analysis encompassing three aspects: overall conclusion, jurisprudential analysis, and resolution suggestions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07904v1-abstract-full').style.display = 'none'; document.getElementById('2502.07904v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07813">arXiv:2502.07813</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07813">pdf</a>, <a href="https://arxiv.org/format/2502.07813">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CryptoX : Compositional Reasoning Evaluation of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shi%2C+J">Jiajun Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+C">Chaoren Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+L">Liqun Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z+M">Zekun Moore Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+C">Chenghao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+S">Stephen Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+T">Tao Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+Z">Zhoufutu Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07813v1-abstract-short" style="display: inline;"> The compositional reasoning capacity has long been regarded as critical to the generalization and intelligence emergence of large language models LLMs. However, despite numerous reasoning-related benchmarks, the compositional reasoning capacity of LLMs is rarely studied or quantified in the existing benchmarks. In this paper, we introduce CryptoX, an evaluation framework that, for the first time,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07813v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07813v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07813v1-abstract-full" style="display: none;"> The compositional reasoning capacity has long been regarded as critical to the generalization and intelligence emergence of large language models LLMs. However, despite numerous reasoning-related benchmarks, the compositional reasoning capacity of LLMs is rarely studied or quantified in the existing benchmarks. In this paper, we introduce CryptoX, an evaluation framework that, for the first time, combines existing benchmarks and cryptographic, to quantify the compositional reasoning capacity of LLMs. Building upon CryptoX, we construct CryptoBench, which integrates these principles into several benchmarks for systematic evaluation. We conduct detailed experiments on widely used open-source and closed-source LLMs using CryptoBench, revealing a huge gap between open-source and closed-source LLMs. We further conduct thorough mechanical interpretability experiments to reveal the inner mechanism of LLMs&#39; compositional reasoning, involving subproblem decomposition, subproblem inference, and summarizing subproblem conclusions. Through analysis based on CryptoBench, we highlight the value of independently studying compositional reasoning and emphasize the need to enhance the compositional reasoning capabilities of LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07813v1-abstract-full').style.display = 'none'; document.getElementById('2502.07813v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07487">arXiv:2502.07487</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07487">pdf</a>, <a href="https://arxiv.org/format/2502.07487">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Collaboration for Multilingual Code Instruction Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiaxi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Miao%2C+Y">Yibo Miao</a>, <a href="/search/cs?searchtype=author&amp;query=Quan%2C+S">Shanghaoran Quan</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhenhe Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+Q">Qiyao Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+L">Liqun Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tianyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+Z">Zeyu Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Hui%2C+B">Binyuan Hui</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+J">Junyang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07487v1-abstract-short" style="display: inline;"> Recent advancement in code understanding and generation demonstrates that code LLMs fine-tuned on a high-quality instruction dataset can gain powerful capabilities to address wide-ranging code-related tasks. However, most previous existing methods mainly view each programming language in isolation and ignore the knowledge transfer among different programming languages. To bridge the gap among diff&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07487v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07487v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07487v1-abstract-full" style="display: none;"> Recent advancement in code understanding and generation demonstrates that code LLMs fine-tuned on a high-quality instruction dataset can gain powerful capabilities to address wide-ranging code-related tasks. However, most previous existing methods mainly view each programming language in isolation and ignore the knowledge transfer among different programming languages. To bridge the gap among different programming languages, we introduce a novel multi-agent collaboration framework to enhance multilingual instruction tuning for code LLMs, where multiple language-specific intelligent agent components with generation memory work together to transfer knowledge from one language to another efficiently and effectively. Specifically, we first generate the language-specific instruction data from the code snippets and then provide the generated data as the seed data for language-specific agents. Multiple language-specific agents discuss and collaborate to formulate a new instruction and its corresponding solution (A new programming language or existing programming language), To further encourage the cross-lingual transfer, each agent stores its generation history as memory and then summarizes its merits and faults. Finally, the high-quality multilingual instruction data is used to encourage knowledge transfer among different programming languages to train Qwen2.5-xCoder. Experimental results on multilingual programming benchmarks demonstrate the superior performance of Qwen2.5-xCoder in sharing common knowledge, highlighting its potential to reduce the cross-lingual gap. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07487v1-abstract-full').style.display = 'none'; document.getElementById('2502.07487v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07293">arXiv:2502.07293</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07293">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Global Universal Scaling and Ultra-Small Parameterization in Machine Learning Interatomic Potentials with Super-Linearity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Y">Yanxiao Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Sheng%2C+Y">Ye Sheng</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jing Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xiaoxin Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yuyan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Mingqiang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yabei Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+C">Caichao Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wenqing Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07293v1-abstract-short" style="display: inline;"> Using machine learning (ML) to construct interatomic interactions and thus potential energy surface (PES) has become a common strategy for materials design and simulations. However, those current models of machine learning interatomic potential (MLIP) provide no relevant physical constrains, and thus may owe intrinsic out-of-domain difficulty which underlies the challenges of model generalizabilit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07293v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07293v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07293v1-abstract-full" style="display: none;"> Using machine learning (ML) to construct interatomic interactions and thus potential energy surface (PES) has become a common strategy for materials design and simulations. However, those current models of machine learning interatomic potential (MLIP) provide no relevant physical constrains, and thus may owe intrinsic out-of-domain difficulty which underlies the challenges of model generalizability and physical scalability. Here, by incorporating physics-informed Universal-Scaling law and nonlinearity-embedded interaction function, we develop a Super-linear MLIP with both Ultra-Small parameterization and greatly expanded expressive capability, named SUS2-MLIP. Due to the global scaling rooting in universal equation of state (UEOS), SUS2-MLIP not only has significantly-reduced parameters by decoupling the element space from coordinate space, but also naturally outcomes the out-of-domain difficulty and endows the potentials with inherent generalizability and scalability even with relatively small training dataset. The nonlinearity-enbeding transformation for interaction function expands the expressive capability and make the potentials super-linear. The SUS2-MLIP outperforms the state-of-the-art MLIP models with its exceptional computational efficiency especially for multiple-element materials and physical scalability in property prediction. This work not only presents a highly-efficient universal MLIP model but also sheds light on incorporating physical constraints into artificial-intelligence-aided materials simulation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07293v1-abstract-full').style.display = 'none'; document.getElementById('2502.07293v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07289">arXiv:2502.07289</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07289">pdf</a>, <a href="https://arxiv.org/format/2502.07289">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Inverse Laplacian Pyramid for Progressive Depth Completion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+K">Kun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+Z">Zhiqiang Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+J">Junkai Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07289v1-abstract-short" style="display: inline;"> Depth completion endeavors to reconstruct a dense depth map from sparse depth measurements, leveraging the information provided by a corresponding color image. Existing approaches mostly hinge on single-scale propagation strategies that iteratively ameliorate initial coarse depth estimates through pixel-level message passing. Despite their commendable outcomes, these techniques are frequently hamp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07289v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07289v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07289v1-abstract-full" style="display: none;"> Depth completion endeavors to reconstruct a dense depth map from sparse depth measurements, leveraging the information provided by a corresponding color image. Existing approaches mostly hinge on single-scale propagation strategies that iteratively ameliorate initial coarse depth estimates through pixel-level message passing. Despite their commendable outcomes, these techniques are frequently hampered by computational inefficiencies and a limited grasp of scene context. To circumvent these challenges, we introduce LP-Net, an innovative framework that implements a multi-scale, progressive prediction paradigm based on Laplacian Pyramid decomposition. Diverging from propagation-based approaches, LP-Net initiates with a rudimentary, low-resolution depth prediction to encapsulate the global scene context, subsequently refining this through successive upsampling and the reinstatement of high-frequency details at incremental scales. We have developed two novel modules to bolster this strategy: 1) the Multi-path Feature Pyramid module, which segregates feature maps into discrete pathways, employing multi-scale transformations to amalgamate comprehensive spatial information, and 2) the Selective Depth Filtering module, which dynamically learns to apply both smoothness and sharpness filters to judiciously mitigate noise while accentuating intricate details. By integrating these advancements, LP-Net not only secures state-of-the-art (SOTA) performance across both outdoor and indoor benchmarks such as KITTI, NYUv2, and TOFDC, but also demonstrates superior computational efficiency. At the time of submission, LP-Net ranks 1st among all peer-reviewed methods on the official KITTI leaderboard. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07289v1-abstract-full').style.display = 'none'; document.getElementById('2502.07289v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07225">arXiv:2502.07225</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07225">pdf</a>, <a href="https://arxiv.org/format/2502.07225">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CAT: Contrastive Adversarial Training for Evaluating the Robustness of Protective Perturbations in Latent Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Peng%2C+S">Sen Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Mingyue Wang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jianfei He</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jijia Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+X">Xiaohua Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07225v1-abstract-short" style="display: inline;"> Latent diffusion models have recently demonstrated superior capabilities in many downstream image synthesis tasks. However, customization of latent diffusion models using unauthorized data can severely compromise the privacy and intellectual property rights of data owners. Adversarial examples as protective perturbations have been developed to defend against unauthorized data usage by introducing&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07225v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07225v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07225v1-abstract-full" style="display: none;"> Latent diffusion models have recently demonstrated superior capabilities in many downstream image synthesis tasks. However, customization of latent diffusion models using unauthorized data can severely compromise the privacy and intellectual property rights of data owners. Adversarial examples as protective perturbations have been developed to defend against unauthorized data usage by introducing imperceptible noise to customization samples, preventing diffusion models from effectively learning them. In this paper, we first reveal that the primary reason adversarial examples are effective as protective perturbations in latent diffusion models is the distortion of their latent representations, as demonstrated through qualitative and quantitative experiments. We then propose the Contrastive Adversarial Training (CAT) utilizing adapters as an adaptive attack against these protection methods, highlighting their lack of robustness. Extensive experiments demonstrate that our CAT method significantly reduces the effectiveness of protective perturbations in customization configurations, urging the community to reconsider and enhance the robustness of existing protective perturbation methods. Code is available at \hyperlink{here}{https://github.com/senp98/CAT}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07225v1-abstract-full').style.display = 'none'; document.getElementById('2502.07225v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06876">arXiv:2502.06876</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06876">pdf</a>, <a href="https://arxiv.org/format/2502.06876">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mix Data or Merge Models? Balancing the Helpfulness, Honesty, and Harmlessness of Large Language Model via Model Merging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jinluan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+D">Dingnan Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+A">Anke Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+L">Li Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+D">Didi Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhengyu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+D">Daixin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+Q">Qing Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhiqiang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+F">Fei Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Kuang%2C+K">Kun Kuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06876v2-abstract-short" style="display: inline;"> Achieving balanced alignment of large language models (LLMs) in terms of Helpfulness, Honesty, and Harmlessness (3H optimization) constitutes a cornerstone of responsible AI, with existing methods like data mixture strategies facing limitations including reliance on expert knowledge and conflicting optimization signals. While model merging offers a promising alternative by integrating specialized&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06876v2-abstract-full').style.display = 'inline'; document.getElementById('2502.06876v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06876v2-abstract-full" style="display: none;"> Achieving balanced alignment of large language models (LLMs) in terms of Helpfulness, Honesty, and Harmlessness (3H optimization) constitutes a cornerstone of responsible AI, with existing methods like data mixture strategies facing limitations including reliance on expert knowledge and conflicting optimization signals. While model merging offers a promising alternative by integrating specialized models, its potential for 3H optimization remains underexplored. This paper establishes the first comprehensive benchmark for model merging in 3H-aligned LLMs, systematically evaluating 15 methods (12 training-free merging and 3 data mixture techniques) across 10 datasets associated with 5 annotation dimensions, 2 LLM families, and 2 training paradigms. Our analysis reveals three pivotal insights: (i) previously overlooked collaborative/conflicting relationships among 3H dimensions, (ii) the consistent superiority of model merging over data mixture approaches in balancing alignment trade-offs, and (iii) the critical role of parameter-level conflict resolution through redundant component pruning and outlier mitigation. Building on these findings, we propose R-TSVM, a Reweighting-enhanced Task Singular Vector Merging method that incorporates outlier-aware parameter weighting and sparsity-adaptive rank selection strategies adapted to the heavy-tailed parameter distribution and sparsity for LLMs, further improving LLM alignment across multiple evaluations. We release our trained models for further exploration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06876v2-abstract-full').style.display = 'none'; document.getElementById('2502.06876v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06589">arXiv:2502.06589</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06589">pdf</a>, <a href="https://arxiv.org/format/2502.06589">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hephaestus: Improving Fundamental Agent Capabilities of Large Language Models through Continual Pre-Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhuang%2C+Y">Yuchen Zhuang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jingfeng Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+H">Haoming Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+K">Kewei Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Lokegaonkar%2C+S">Sanket Lokegaonkar</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+Y">Yifan Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Ping%2C+Q">Qing Ping</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tianyi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+B">Binxuan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhengyang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+P">Pei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+R">Ruijie Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+R">Rongzhi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zalmout%2C+N">Nasser Zalmout</a>, <a href="/search/cs?searchtype=author&amp;query=Nigam%2C+P">Priyanka Nigam</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+B">Bing Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06589v1-abstract-short" style="display: inline;"> Due to the scarcity of agent-oriented pre-training data, LLM-based autonomous agents typically rely on complex prompting or extensive fine-tuning, which often fails to introduce new capabilities while preserving strong generalizability. We introduce Hephaestus-Forge, the first large-scale pre-training corpus designed to enhance the fundamental capabilities of LLM agents in API function calling, in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06589v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06589v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06589v1-abstract-full" style="display: none;"> Due to the scarcity of agent-oriented pre-training data, LLM-based autonomous agents typically rely on complex prompting or extensive fine-tuning, which often fails to introduce new capabilities while preserving strong generalizability. We introduce Hephaestus-Forge, the first large-scale pre-training corpus designed to enhance the fundamental capabilities of LLM agents in API function calling, intrinsic reasoning and planning, and adapting to environmental feedback. Hephaestus-Forge comprises 103B agent-specific data encompassing 76,537 APIs, including both tool documentation to introduce knowledge of API functions and function calling trajectories to strengthen intrinsic reasoning. To explore effective training protocols, we investigate scaling laws to identify the optimal recipe in data mixing ratios. By continual pre-training on Hephaestus-Forge, Hephaestus outperforms small- to medium-scale open-source LLMs and rivals commercial LLMs on three agent benchmarks, demonstrating the effectiveness of our pre-training corpus in enhancing fundamental agentic capabilities and generalization of LLMs to new tasks or environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06589v1-abstract-full').style.display = 'none'; document.getElementById('2502.06589v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL 2025 main conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06583">arXiv:2502.06583</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06583">pdf</a>, <a href="https://arxiv.org/format/2502.06583">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Perception for Unified Visual Multi-modal Object Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hu%2C+X">Xiantao Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+B">Bineng Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+Q">Qihua Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Mo%2C+Z">Zhiyi Mo</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+L">Liangtao Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Tai%2C+Y">Ying Tai</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06583v1-abstract-short" style="display: inline;"> Recently, many multi-modal trackers prioritize RGB as the dominant modality, treating other modalities as auxiliary, and fine-tuning separately various multi-modal tasks. This imbalance in modality dependence limits the ability of methods to dynamically utilize complementary information from each modality in complex scenarios, making it challenging to fully perceive the advantages of multi-modal.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06583v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06583v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06583v1-abstract-full" style="display: none;"> Recently, many multi-modal trackers prioritize RGB as the dominant modality, treating other modalities as auxiliary, and fine-tuning separately various multi-modal tasks. This imbalance in modality dependence limits the ability of methods to dynamically utilize complementary information from each modality in complex scenarios, making it challenging to fully perceive the advantages of multi-modal. As a result, a unified parameter model often underperforms in various multi-modal tracking tasks. To address this issue, we propose APTrack, a novel unified tracker designed for multi-modal adaptive perception. Unlike previous methods, APTrack explores a unified representation through an equal modeling strategy. This strategy allows the model to dynamically adapt to various modalities and tasks without requiring additional fine-tuning between different tasks. Moreover, our tracker integrates an adaptive modality interaction (AMI) module that efficiently bridges cross-modality interactions by generating learnable tokens. Experiments conducted on five diverse multi-modal datasets (RGBT234, LasHeR, VisEvent, DepthTrack, and VOT-RGBD2022) demonstrate that APTrack not only surpasses existing state-of-the-art unified multi-modal trackers but also outperforms trackers designed for specific multi-modal tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06583v1-abstract-full').style.display = 'none'; document.getElementById('2502.06583v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06498">arXiv:2502.06498</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06498">pdf</a>, <a href="https://arxiv.org/format/2502.06498">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Decision Boundary Optimization-Informed Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+L">Lingkun Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+S">Shiqiang Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jie Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Liming Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06498v1-abstract-short" style="display: inline;"> Maximum Mean Discrepancy (MMD) is widely used in a number of domain adaptation (DA) methods and shows its effectiveness in aligning data distributions across domains. However, in previous DA research, MMD-based DA methods focus mostly on distribution alignment, and ignore to optimize the decision boundary for classification-aware DA, thereby falling short in reducing the DA upper error bound. In t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06498v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06498v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06498v1-abstract-full" style="display: none;"> Maximum Mean Discrepancy (MMD) is widely used in a number of domain adaptation (DA) methods and shows its effectiveness in aligning data distributions across domains. However, in previous DA research, MMD-based DA methods focus mostly on distribution alignment, and ignore to optimize the decision boundary for classification-aware DA, thereby falling short in reducing the DA upper error bound. In this paper, we propose a strengthened MMD measurement, namely, Decision Boundary optimization-informed MMD (DB-MMD), which enables MMD to carefully take into account the decision boundaries, thereby simultaneously optimizing the distribution alignment and cross-domain classifier within a hybrid framework, and leading to a theoretical bound guided DA. We further seamlessly embed the proposed DB-MMD measurement into several popular DA methods, e.g., MEDA, DGA-DA, to demonstrate its effectiveness w.r.t different experimental settings. We carry out comprehensive experiments using 8 standard DA datasets. The experimental results show that the DB-MMD enforced DA methods improve their baseline models using plain vanilla MMD, with a margin that can be as high as 9.5. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06498v1-abstract-full').style.display = 'none'; document.getElementById('2502.06498v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05783">arXiv:2502.05783</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05783">pdf</a>, <a href="https://arxiv.org/format/2502.05783">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> WatchGuardian: Enabling User-Defined Personalized Just-in-Time Intervention on Smartwatch </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lei%2C+Y">Ying Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yancheng Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Will Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Y">Yuanzhe Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+C">Changchang Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+W">Weidan Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jingzhen Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+B">Bingsheng Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+Y">Yifan Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Weng%2C+C">Chunhua Weng</a>, <a href="/search/cs?searchtype=author&amp;query=Auerbach%2C+R">Randy Auerbach</a>, <a href="/search/cs?searchtype=author&amp;query=Mamykina%2C+L">Lena Mamykina</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+D">Dakuo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yuntao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xuhai Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05783v1-abstract-short" style="display: inline;"> While just-in-time interventions (JITIs) have effectively targeted common health behaviors, individuals often have unique needs to intervene in personal undesirable actions that can negatively affect physical, mental, and social well-being. We present WatchGuardian, a smartwatch-based JITI system that empowers users to define custom interventions for these personal actions with a small number of s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05783v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05783v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05783v1-abstract-full" style="display: none;"> While just-in-time interventions (JITIs) have effectively targeted common health behaviors, individuals often have unique needs to intervene in personal undesirable actions that can negatively affect physical, mental, and social well-being. We present WatchGuardian, a smartwatch-based JITI system that empowers users to define custom interventions for these personal actions with a small number of samples. For the model to detect new actions based on limited new data samples, we developed a few-shot learning pipeline that finetuned a pre-trained inertial measurement unit (IMU) model on public hand-gesture datasets. We then designed a data augmentation and synthesis process to train additional classification layers for customization. Our offline evaluation with 26 participants showed that with three, five, and ten examples, our approach achieved an average accuracy of 76.8%, 84.7%, and 87.7%, and an F1 score of 74.8%, 84.2%, and 87.2% We then conducted a four-hour intervention study to compare WatchGuardian against a rule-based intervention. Our results demonstrated that our system led to a significant reduction by 64.0 +- 22.6% in undesirable actions, substantially outperforming the baseline by 29.0%. Our findings underscore the effectiveness of a customizable, AI-driven JITI system for individuals in need of behavioral intervention in personal undesirable actions. We envision that our work can inspire broader applications of user-defined personalized intervention with advanced AI solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05783v1-abstract-full').style.display = 'none'; document.getElementById('2502.05783v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under submission</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68U35 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.5.2; I.2.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05701">arXiv:2502.05701</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05701">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> TOKON: TOKenization-Optimized Normalization for time series analysis with a large language model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Janghoon Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05701v1-abstract-short" style="display: inline;"> While large language models have rapidly evolved towards general artificial intelligence, their versatility in analyzing time series data remains limited. To address this limitation, we propose a novel normalization technique that considers the inherent nature of tokenization. The proposed Tokenization-Optimized Normalization (TOKON) simplifies time series data by representing each element with a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05701v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05701v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05701v1-abstract-full" style="display: none;"> While large language models have rapidly evolved towards general artificial intelligence, their versatility in analyzing time series data remains limited. To address this limitation, we propose a novel normalization technique that considers the inherent nature of tokenization. The proposed Tokenization-Optimized Normalization (TOKON) simplifies time series data by representing each element with a single token, effectively reducing the number of tokens by 2 to 3 times. Additionally, we introduce a novel prompt for time series forecasting, termed Time Series Forecasting with Care (TFSC), to further enhance forecasting performance. Experimental results demonstrate that TOKON improves root mean square error (RMSE) for multi-step forecasting by approximately 7% to 18%, depending on the dataset and prompting method. Furthermore, TFSC, when used in conjunction with TOKON, shows additional improvements in forecasting accuracy for certain datasets <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05701v1-abstract-full').style.display = 'none'; document.getElementById('2502.05701v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05699">arXiv:2502.05699</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05699">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Context information can be more important than reasoning for time series forecasting with a large language model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Janghoon Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05699v1-abstract-short" style="display: inline;"> With the evolution of large language models (LLMs), there is growing interest in leveraging LLMs for time series tasks. In this paper, we explore the characteristics of LLMs for time series forecasting by considering various existing and proposed prompting techniques. Forecasting for both short and long time series was evaluated. Our findings indicate that no single prompting method is universally&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05699v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05699v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05699v1-abstract-full" style="display: none;"> With the evolution of large language models (LLMs), there is growing interest in leveraging LLMs for time series tasks. In this paper, we explore the characteristics of LLMs for time series forecasting by considering various existing and proposed prompting techniques. Forecasting for both short and long time series was evaluated. Our findings indicate that no single prompting method is universally applicable. It was also observed that simply providing proper context information related to the time series, without additional reasoning prompts, can achieve performance comparable to the best-performing prompt for each case. From this observation, it is expected that providing proper context information can be more crucial than a prompt for specific reasoning in time series forecasting. Several weaknesses in prompting for time series forecasting were also identified. First, LLMs often fail to follow the procedures described by the prompt. Second, when reasoning steps involve simple algebraic calculations with several operands, LLMs often fail to calculate accurately. Third, LLMs sometimes misunderstand the semantics of prompts, resulting in incomplete responses. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05699v1-abstract-full').style.display = 'none'; document.getElementById('2502.05699v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05271">arXiv:2502.05271</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05271">pdf</a>, <a href="https://arxiv.org/format/2502.05271">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> RobotMover: Learning to Move Large Objects by Imitating the Dynamic Chain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tianyu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+J">Joanne Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jimmy Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Clegg%2C+A">Alexander Clegg</a>, <a href="/search/cs?searchtype=author&amp;query=Rai%2C+A">Akshara Rai</a>, <a href="/search/cs?searchtype=author&amp;query=Ha%2C+S">Sehoon Ha</a>, <a href="/search/cs?searchtype=author&amp;query=Puig%2C+X">Xavier Puig</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05271v1-abstract-short" style="display: inline;"> Moving large objects, such as furniture, is a critical capability for robots operating in human environments. This task presents significant challenges due to two key factors: the need to synchronize whole-body movements to prevent collisions between the robot and the object, and the under-actuated dynamics arising from the substantial size and weight of the objects. These challenges also complica&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05271v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05271v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05271v1-abstract-full" style="display: none;"> Moving large objects, such as furniture, is a critical capability for robots operating in human environments. This task presents significant challenges due to two key factors: the need to synchronize whole-body movements to prevent collisions between the robot and the object, and the under-actuated dynamics arising from the substantial size and weight of the objects. These challenges also complicate performing these tasks via teleoperation. In this work, we introduce \method, a generalizable learning framework that leverages human-object interaction demonstrations to enable robots to perform large object manipulation tasks. Central to our approach is the Dynamic Chain, a novel representation that abstracts human-object interactions so that they can be retargeted to robotic morphologies. The Dynamic Chain is a spatial descriptor connecting the human and object root position via a chain of nodes, which encode the position and velocity of different interaction keypoints. We train policies in simulation using Dynamic-Chain-based imitation rewards and domain randomization, enabling zero-shot transfer to real-world settings without fine-tuning. Our approach outperforms both learning-based methods and teleoperation baselines across six evaluation metrics when tested on three distinct object types, both in simulation and on physical hardware. Furthermore, we successfully apply the learned policies to real-world tasks, such as moving a trash cart and rearranging chairs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05271v1-abstract-full').style.display = 'none'; document.getElementById('2502.05271v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05034">arXiv:2502.05034</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05034">pdf</a>, <a href="https://arxiv.org/format/2502.05034">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MindAligner: Explicit Brain Functional Alignment for Cross-Subject Visual Decoding from Limited fMRI Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dai%2C+Y">Yuqin Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Z">Zhouheng Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+C">Chunfeng Song</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Q">Qihao Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Mai%2C+W">Weijian Mai</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+K">Kunyu Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+S">Shuai Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Jiamin Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05034v1-abstract-short" style="display: inline;"> Brain decoding aims to reconstruct visual perception of human subject from fMRI signals, which is crucial for understanding brain&#39;s perception mechanisms. Existing methods are confined to the single-subject paradigm due to substantial brain variability, which leads to weak generalization across individuals and incurs high training costs, exacerbated by limited availability of fMRI data. To address&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05034v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05034v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05034v1-abstract-full" style="display: none;"> Brain decoding aims to reconstruct visual perception of human subject from fMRI signals, which is crucial for understanding brain&#39;s perception mechanisms. Existing methods are confined to the single-subject paradigm due to substantial brain variability, which leads to weak generalization across individuals and incurs high training costs, exacerbated by limited availability of fMRI data. To address these challenges, we propose MindAligner, an explicit functional alignment framework for cross-subject brain decoding from limited fMRI data. The proposed MindAligner enjoys several merits. First, we learn a Brain Transfer Matrix (BTM) that projects the brain signals of an arbitrary new subject to one of the known subjects, enabling seamless use of pre-trained decoding models. Second, to facilitate reliable BTM learning, a Brain Functional Alignment module is proposed to perform soft cross-subject brain alignment under different visual stimuli with a multi-level brain alignment loss, uncovering fine-grained functional correspondences with high interpretability. Experiments indicate that MindAligner not only outperforms existing methods in visual decoding under data-limited conditions, but also provides valuable neuroscience insights in cross-subject functional analysis. The code will be made publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05034v1-abstract-full').style.display = 'none'; document.getElementById('2502.05034v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05017">arXiv:2502.05017</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05017">pdf</a>, <a href="https://arxiv.org/format/2502.05017">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="General Economics">econ.GN</span> </div> </div> <p class="title is-5 mathjax"> Bridging Voting and Deliberation with Algorithms: Field Insights from vTaiwan and Kultur Komitee </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J+C">Joshua C. Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Bachmann%2C+F">Fynn Bachmann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05017v1-abstract-short" style="display: inline;"> Democratic processes increasingly aim to integrate large-scale voting with face-to-face deliberation, addressing the challenge of reconciling individual preferences with collective decision-making. This work introduces new methods that use algorithms and computational tools to bridge online voting with face-to-face deliberation, tested in two real-world scenarios: Kultur Komitee 2024 (KK24) and vT&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05017v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05017v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05017v1-abstract-full" style="display: none;"> Democratic processes increasingly aim to integrate large-scale voting with face-to-face deliberation, addressing the challenge of reconciling individual preferences with collective decision-making. This work introduces new methods that use algorithms and computational tools to bridge online voting with face-to-face deliberation, tested in two real-world scenarios: Kultur Komitee 2024 (KK24) and vTaiwan. These case studies highlight the practical applications and impacts of the proposed methods. We present three key contributions: (1) Radial Clustering for Preference Based Subgroups, which enables both in-depth and broad discussions in deliberative settings by computing homogeneous and heterogeneous group compositions with balanced and adjustable group sizes; (2) Human-in-the-loop MES, a practical method that enhances the Method of Equal Shares (MES) algorithm with real-time digital feedback. This builds algorithmic trust by giving participants full control over how much decision-making is delegated to the voting aggregation algorithm as compared to deliberation; and (3) the ReadTheRoom deliberation method, which uses opinion space mapping to identify agreement and divergence, along with spectrum-based preference visualisation to track opinion shifts during deliberation. This approach enhances transparency by clarifying collective sentiment and fosters collaboration by encouraging participants to engage constructively with differing perspectives. By introducing these actionable frameworks, this research extends in-person deliberation with scalable digital methods that address the complexities of modern decision-making in participatory processes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05017v1-abstract-full').style.display = 'none'; document.getElementById('2502.05017v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ACM Conference on Fairness, Accountability, and Transparency (FAccT) 2025</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 91B14; 91B12; 91A12; 68T01; 68T20; 68U35 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.5.3; I.2.0; I.2.11; J.1; G.2.0; G.2.2; K.4.1; K.4.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04797">arXiv:2502.04797</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.04797">pdf</a>, <a href="https://arxiv.org/format/2502.04797">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Self-Rationalization in the Wild: A Large Scale Out-of-Distribution Evaluation on NLI-related tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jing Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Glockner%2C+M">Max Glockner</a>, <a href="/search/cs?searchtype=author&amp;query=Rocha%2C+A">Anderson Rocha</a>, <a href="/search/cs?searchtype=author&amp;query=Gurevych%2C+I">Iryna Gurevych</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04797v1-abstract-short" style="display: inline;"> Free-text explanations are expressive and easy to understand, but many datasets lack annotated explanation data, making it challenging to train models for explainable predictions. To address this, we investigate how to use existing explanation datasets for self-rationalization and evaluate models&#39; out-of-distribution (OOD) performance. We fine-tune T5-Large and OLMo-7B models and assess the impact&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04797v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04797v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04797v1-abstract-full" style="display: none;"> Free-text explanations are expressive and easy to understand, but many datasets lack annotated explanation data, making it challenging to train models for explainable predictions. To address this, we investigate how to use existing explanation datasets for self-rationalization and evaluate models&#39; out-of-distribution (OOD) performance. We fine-tune T5-Large and OLMo-7B models and assess the impact of fine-tuning data quality, the number of fine-tuning samples, and few-shot selection methods. The models are evaluated on 19 diverse OOD datasets across three tasks: natural language inference (NLI), fact-checking, and hallucination detection in abstractive summarization. For the generated explanation evaluation, we conduct a human study on 13 selected models and study its correlation with the Acceptability score (T5-11B) and three other LLM-based reference-free metrics. Human evaluation shows that the Acceptability score correlates most strongly with human judgments, demonstrating its effectiveness in evaluating free-text explanations. Our findings reveal: 1) few annotated examples effectively adapt models for OOD explanation generation; 2) compared to sample selection strategies, fine-tuning data source has a larger impact on OOD performance; and 3) models with higher label prediction accuracy tend to produce better explanations, as reflected by higher Acceptability scores. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04797v1-abstract-full').style.display = 'none'; document.getElementById('2502.04797v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at TACL; pre-MIT Press publication version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04722">arXiv:2502.04722</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.04722">pdf</a>, <a href="https://arxiv.org/format/2502.04722">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Singing Voice Conversion with Accompaniment Using Self-Supervised Representation-Based Melody Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Sha%2C+B">Binzhu Sha</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jing Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhuo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+F">Fan Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhiyong Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04722v1-abstract-short" style="display: inline;"> Melody preservation is crucial in singing voice conversion (SVC). However, in many scenarios, audio is often accompanied with background music (BGM), which can cause audio distortion and interfere with the extraction of melody and other key features, significantly degrading SVC performance. Previous methods have attempted to address this by using more robust neural network-based melody extractors,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04722v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04722v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04722v1-abstract-full" style="display: none;"> Melody preservation is crucial in singing voice conversion (SVC). However, in many scenarios, audio is often accompanied with background music (BGM), which can cause audio distortion and interfere with the extraction of melody and other key features, significantly degrading SVC performance. Previous methods have attempted to address this by using more robust neural network-based melody extractors, but their performance drops sharply in the presence of complex accompaniment. Other approaches involve performing source separation before conversion, but this often introduces noticeable artifacts, leading to a significant drop in conversion quality and increasing the user&#39;s operational costs. To address these issues, we introduce a novel SVC method that uses self-supervised representation-based melody features to improve melody modeling accuracy in the presence of BGM. In our experiments, we compare the effectiveness of different self-supervised learning (SSL) models for melody extraction and explore for the first time how SSL benefits the task of melody extraction. The experimental results demonstrate that our proposed SVC model significantly outperforms existing baseline methods in terms of melody accuracy and shows higher similarity and naturalness in both subjective and objective evaluations across noisy and clean audio environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04722v1-abstract-full').style.display = 'none'; document.getElementById('2502.04722v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04675">arXiv:2502.04675</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.04675">pdf</a>, <a href="https://arxiv.org/format/2502.04675">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scalable Oversight for Superhuman AI via Recursive Self-Critiquing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wen%2C+X">Xueru Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Lou%2C+J">Jie Lou</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+X">Xinyu Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Junjie Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yanjiang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Y">Yaojie Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Debing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=XingYu"> XingYu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04675v1-abstract-short" style="display: inline;"> As AI capabilities increasingly surpass human proficiency in complex tasks, current alignment techniques including SFT and RLHF face fundamental challenges in ensuring reliable oversight. These methods rely on direct human assessment and become untenable when AI outputs exceed human cognitive thresholds. In response to this challenge, we explore two hypotheses: (1) critique of critique can be easi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04675v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04675v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04675v1-abstract-full" style="display: none;"> As AI capabilities increasingly surpass human proficiency in complex tasks, current alignment techniques including SFT and RLHF face fundamental challenges in ensuring reliable oversight. These methods rely on direct human assessment and become untenable when AI outputs exceed human cognitive thresholds. In response to this challenge, we explore two hypotheses: (1) critique of critique can be easier than critique itself, extending the widely-accepted observation that verification is easier than generation to the critique domain, as critique itself is a specialized form of generation; (2) this difficulty relationship is recursively held, suggesting that when direct evaluation is infeasible, performing high-order critiques (e.g., critique of critique of critique) offers a more tractable supervision pathway. To examine these hypotheses, we perform Human-Human, Human-AI, and AI-AI experiments across multiple tasks. Our results demonstrate encouraging evidence supporting these hypotheses and suggest that recursive self-critiquing is a promising direction for scalable oversight. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04675v1-abstract-full').style.display = 'none'; document.getElementById('2502.04675v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04393">arXiv:2502.04393</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.04393">pdf</a>, <a href="https://arxiv.org/format/2502.04393">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> UniCP: A Unified Caching and Pruning Framework for Efficient Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+W">Wenzhang Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+Q">Qirui Hou</a>, <a href="/search/cs?searchtype=author&amp;query=Di%2C+D">Donglin Di</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiahui Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Y">Yongjia Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+J">Jianxun Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04393v1-abstract-short" style="display: inline;"> Diffusion Transformers (DiT) excel in video generation but encounter significant computational challenges due to the quadratic complexity of attention. Notably, attention differences between adjacent diffusion steps follow a U-shaped pattern. Current methods leverage this property by caching attention blocks, however, they still struggle with sudden error spikes and large discrepancies. To address&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04393v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04393v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04393v1-abstract-full" style="display: none;"> Diffusion Transformers (DiT) excel in video generation but encounter significant computational challenges due to the quadratic complexity of attention. Notably, attention differences between adjacent diffusion steps follow a U-shaped pattern. Current methods leverage this property by caching attention blocks, however, they still struggle with sudden error spikes and large discrepancies. To address these issues, we propose UniCP a unified caching and pruning framework for efficient video generation. UniCP optimizes both temporal and spatial dimensions through. Error Aware Dynamic Cache Window (EDCW): Dynamically adjusts cache window sizes for different blocks at various timesteps, adapting to abrupt error changes. PCA based Slicing (PCAS) and Dynamic Weight Shift (DWS): PCAS prunes redundant attention components, and DWS integrates caching and pruning by enabling dynamic switching between pruned and cached outputs. By adjusting cache windows and pruning redundant components, UniCP enhances computational efficiency and maintains video detail fidelity. Experimental results show that UniCP outperforms existing methods in both performance and efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04393v1-abstract-full').style.display = 'none'; document.getElementById('2502.04393v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04371">arXiv:2502.04371</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.04371">pdf</a>, <a href="https://arxiv.org/format/2502.04371">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PerPO: Perceptual Preference Optimization via Discriminative Rewarding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Z">Zining Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+L">Liang Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+K">Kangheng Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jinze Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+E">En Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+C">Chenglong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+H">Haoran Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+J">Jianjian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Ge%2C+Z">Zheng Ge</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiangyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04371v1-abstract-short" style="display: inline;"> This paper presents Perceptual Preference Optimization (PerPO), a perception alignment method aimed at addressing the visual discrimination challenges in generative pre-trained multimodal large language models (MLLMs). To align MLLMs with human visual perception process, PerPO employs discriminative rewarding to gather diverse negative samples, followed by listwise preference optimization to rank&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04371v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04371v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04371v1-abstract-full" style="display: none;"> This paper presents Perceptual Preference Optimization (PerPO), a perception alignment method aimed at addressing the visual discrimination challenges in generative pre-trained multimodal large language models (MLLMs). To align MLLMs with human visual perception process, PerPO employs discriminative rewarding to gather diverse negative samples, followed by listwise preference optimization to rank them.By utilizing the reward as a quantitative margin for ranking, our method effectively bridges generative preference optimization and discriminative empirical risk minimization. PerPO significantly enhances MLLMs&#39; visual discrimination capabilities while maintaining their generative strengths, mitigates image-unconditional reward hacking, and ensures consistent performance across visual tasks. This work marks a crucial step towards more perceptually aligned and versatile MLLMs. We also hope that PerPO will encourage the community to rethink MLLM alignment strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04371v1-abstract-full').style.display = 'none'; document.getElementById('2502.04371v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04116">arXiv:2502.04116</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.04116">pdf</a>, <a href="https://arxiv.org/format/2502.04116">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Generative Adversarial Networks Bridging Art and Machine Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+J">Junhao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&amp;query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+X">Xuanhe Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jinlang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+L+K+Q">Lawrence K. Q. Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Tseng%2C+H">Hong-Ming Tseng</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+X">Xinyuan Song</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+J">Jintao Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yunze Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Hsieh%2C+W">Weiche Hsieh</a>, <a href="/search/cs?searchtype=author&amp;query=Jing%2C+B">Bowen Jing</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Junjie Yang</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04116v2-abstract-short" style="display: inline;"> Generative Adversarial Networks (GAN) have greatly influenced the development of computer vision and artificial intelligence in the past decade and also connected art and machine intelligence together. This book begins with a detailed introduction to the fundamental principles and historical development of GANs, contrasting them with traditional generative models and elucidating the core adversari&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04116v2-abstract-full').style.display = 'inline'; document.getElementById('2502.04116v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04116v2-abstract-full" style="display: none;"> Generative Adversarial Networks (GAN) have greatly influenced the development of computer vision and artificial intelligence in the past decade and also connected art and machine intelligence together. This book begins with a detailed introduction to the fundamental principles and historical development of GANs, contrasting them with traditional generative models and elucidating the core adversarial mechanisms through illustrative Python examples. The text systematically addresses the mathematical and theoretical underpinnings including probability theory, statistics, and game theory providing a solid framework for understanding the objectives, loss functions, and optimisation challenges inherent to GAN training. Subsequent chapters review classic variants such as Conditional GANs, DCGANs, InfoGAN, and LAPGAN before progressing to advanced training methodologies like Wasserstein GANs, GANs with gradient penalty, least squares GANs, and spectral normalisation techniques. The book further examines architectural enhancements and task-specific adaptations in generators and discriminators, showcasing practical implementations in high resolution image generation, artistic style transfer, video synthesis, text to image generation and other multimedia applications. The concluding sections offer insights into emerging research trends, including self-attention mechanisms, transformer-based generative models, and a comparative analysis with diffusion models, thus charting promising directions for future developments in both academic and applied settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04116v2-abstract-full').style.display = 'none'; document.getElementById('2502.04116v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04075">arXiv:2502.04075</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.04075">pdf</a>, <a href="https://arxiv.org/format/2502.04075">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Controllable Emotion Generation with Emotion Vectors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Y">Yurui Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+L">Luozhijie Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+B">Bingjie Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiaxi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04075v1-abstract-short" style="display: inline;"> In recent years, technologies based on large-scale language models (LLMs) have made remarkable progress in many fields, especially in customer service, content creation, and embodied intelligence, showing broad application potential. However, The LLM&#39;s ability to express emotions with proper tone, timing, and in both direct and indirect forms is still insufficient but significant. Few works have s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04075v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04075v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04075v1-abstract-full" style="display: none;"> In recent years, technologies based on large-scale language models (LLMs) have made remarkable progress in many fields, especially in customer service, content creation, and embodied intelligence, showing broad application potential. However, The LLM&#39;s ability to express emotions with proper tone, timing, and in both direct and indirect forms is still insufficient but significant. Few works have studied on how to build the controlable emotional expression capability of LLMs. In this work, we propose a method for emotion expression output by LLMs, which is universal, highly flexible, and well controllable proved with the extensive experiments and verifications. This method has broad application prospects in fields involving emotions output by LLMs, such as intelligent customer service, literary creation, and home companion robots. The extensive experiments on various LLMs with different model-scales and architectures prove the versatility and the effectiveness of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04075v1-abstract-full').style.display = 'none'; document.getElementById('2502.04075v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03971">arXiv:2502.03971</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.03971">pdf</a>, <a href="https://arxiv.org/format/2502.03971">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> RWKV-UI: UI Understanding with Enhanced Perception and Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiaxi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+H">Haowen Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03971v1-abstract-short" style="display: inline;"> Existing Visual Language Modelsoften struggle with information loss and limited reasoning abilities when handling high-resolution web interfaces that combine complex visual, textual, and interactive elements. These challenges are particularly evident in tasks requiring webpage layout comprehension and multi-step interactive reasoning. To address these challenges, we propose RWKV-UI, a Visual Langu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03971v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03971v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03971v1-abstract-full" style="display: none;"> Existing Visual Language Modelsoften struggle with information loss and limited reasoning abilities when handling high-resolution web interfaces that combine complex visual, textual, and interactive elements. These challenges are particularly evident in tasks requiring webpage layout comprehension and multi-step interactive reasoning. To address these challenges, we propose RWKV-UI, a Visual Language Model based on the RWKV architecture, specifically designed to handle high-resolution UI images. During model training, we introduce layout detection as a visual prompt to help the model better understand the webpage layout structures. Additionally, we design a visual prompt based on the Chain-of-Thought(CoT) mechanism, which enhances the model&#39;s ability to understand and reason about webpage content through reasoning chains. Experimental results show that RWKV-UI demonstrates significant performance improvements in high-resolution UI understanding and interactive reasoning tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03971v1-abstract-full').style.display = 'none'; document.getElementById('2502.03971v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 5figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03732">arXiv:2502.03732</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.03732">pdf</a>, <a href="https://arxiv.org/format/2502.03732">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> More Modality, More AI: Exploring Design Opportunities of AI-Based Multi-modal Remote Monitoring Technologies for Early Detection of Mental Health Sequelae in Youth Concussion Patients </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yao%2C+B">Bingsheng Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+M">Menglin Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yuling Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+W">Weidan Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+C">Changchang Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Intille%2C+S">Stephen Intille</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xuhai Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jingzhen Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+D">Dakuo Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03732v1-abstract-short" style="display: inline;"> Anxiety, depression, and suicidality are common mental health sequelae following concussion in youth patients, often exacerbating concussion symptoms and prolonging recovery. Despite the critical need for early detection of these mental health symptoms, clinicians often face challenges in accurately collecting patients&#39; mental health data and making clinical decision-making in a timely manner. Tod&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03732v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03732v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03732v1-abstract-full" style="display: none;"> Anxiety, depression, and suicidality are common mental health sequelae following concussion in youth patients, often exacerbating concussion symptoms and prolonging recovery. Despite the critical need for early detection of these mental health symptoms, clinicians often face challenges in accurately collecting patients&#39; mental health data and making clinical decision-making in a timely manner. Today&#39;s remote patient monitoring (RPM) technologies offer opportunities to objectively monitor patients&#39; activities, but they were not specifically designed for youth concussion patients; moreover, the large amount of data collected by RPM technologies may also impose significant workloads on clinicians to keep up with and use the data. To address these gaps, we employed a three-stage study consisting of a formative study, interface design, and design evaluation. We first conducted a formative study through semi-structured interviews with six highly professional concussion clinicians and identified clinicians&#39; key challenges in remotely collecting patient information and accessing patient treatment compliance. Subsequently, we proposed preliminary clinician-facing interface designs with the integration of AI-based RPM technologies (AI-RPM), followed by design evaluation sessions with highly professional concussion clinicians. Clinicians underscored the value of integrating multi-modal AI-RPM technologies to support clinicians&#39; decision-making while emphasizing the importance of customizable interfaces with explainability and multiple responsible design considerations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03732v1-abstract-full').style.display = 'none'; document.getElementById('2502.03732v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03549">arXiv:2502.03549</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.03549">pdf</a>, <a href="https://arxiv.org/format/2502.03549">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Kronecker Mask and Interpretive Prompts are Language-Action Video Learners </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jingyi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+Z">Zitong Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Ni%2C+X">Xiuming Ni</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jia He</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03549v3-abstract-short" style="display: inline;"> Contrastive language-image pretraining (CLIP) has significantly advanced image-based vision learning. A pressing topic subsequently arises: how can we effectively adapt CLIP to the video domain? Recent studies have focused on adjusting either the textual or visual branch of CLIP for action recognition. However, we argue that adaptations of both branches are crucial. In this paper, we propose \text&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03549v3-abstract-full').style.display = 'inline'; document.getElementById('2502.03549v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03549v3-abstract-full" style="display: none;"> Contrastive language-image pretraining (CLIP) has significantly advanced image-based vision learning. A pressing topic subsequently arises: how can we effectively adapt CLIP to the video domain? Recent studies have focused on adjusting either the textual or visual branch of CLIP for action recognition. However, we argue that adaptations of both branches are crucial. In this paper, we propose \textbf{CLAVER}: a \textbf{C}ontrastive \textbf{L}anguage-\textbf{A}ction \textbf{V}ideo Learn\textbf{er}, designed to shift CLIP&#39;s focus from the alignment of static visual objects and concrete nouns to the alignment of dynamic action behaviors and abstract verbs. Specifically, we introduce a novel Kronecker mask attention for temporal modeling. Our tailored Kronecker mask offers three benefits 1) it expands the temporal receptive field for each token, 2) it serves as an effective spatiotemporal heterogeneity inductive bias, mitigating the issue of spatiotemporal homogenization, and 3) it can be seamlessly plugged into transformer-based models. Regarding the textual branch, we leverage large language models to generate diverse, sentence-level and semantically rich interpretive prompts of actions, which shift the model&#39;s focus towards the verb comprehension. Extensive experiments on various benchmarks and learning scenarios demonstrate the superiority and generality of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03549v3-abstract-full').style.display = 'none'; document.getElementById('2502.03549v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03493">arXiv:2502.03493</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.03493">pdf</a>, <a href="https://arxiv.org/format/2502.03493">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MetaFE-DE: Learning Meta Feature Embedding for Depth Estimation from Monocular Endoscopic Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lu%2C+D">Dawei Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+D">Deqiang Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Ai%2C+D">Danni Ai</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+J">Jingfan Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+T">Tianyu Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Y">Yucong Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+H">Hong Song</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+X">Xujiong Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03493v1-abstract-short" style="display: inline;"> Depth estimation from monocular endoscopic images presents significant challenges due to the complexity of endoscopic surgery, such as irregular shapes of human soft tissues, as well as variations in lighting conditions. Existing methods primarily estimate the depth information from RGB images directly, and often surffer the limited interpretability and accuracy. Given that RGB and depth images ar&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03493v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03493v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03493v1-abstract-full" style="display: none;"> Depth estimation from monocular endoscopic images presents significant challenges due to the complexity of endoscopic surgery, such as irregular shapes of human soft tissues, as well as variations in lighting conditions. Existing methods primarily estimate the depth information from RGB images directly, and often surffer the limited interpretability and accuracy. Given that RGB and depth images are two views of the same endoscopic surgery scene, in this paper, we introduce a novel concept referred as ``meta feature embedding (MetaFE)&#34;, in which the physical entities (e.g., tissues and surgical instruments) of endoscopic surgery are represented using the shared features that can be alternatively decoded into RGB or depth image. With this concept, we propose a two-stage self-supervised learning paradigm for the monocular endoscopic depth estimation. In the first stage, we propose a temporal representation learner using diffusion models, which are aligned with the spatial information through the cross normalization to construct the MetaFE. In the second stage, self-supervised monocular depth estimation with the brightness calibration is applied to decode the meta features into the depth image. Extensive evaluation on diverse endoscopic datasets demonstrates that our approach outperforms the state-of-the-art method in depth estimation, achieving superior accuracy and generalization. The source code will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03493v1-abstract-full').style.display = 'none'; document.getElementById('2502.03493v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03482">arXiv:2502.03482</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.03482">pdf</a>, <a href="https://arxiv.org/format/2502.03482">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Can Domain Experts Rely on AI Appropriately? A Case Study on AI-Assisted Prostate Cancer MRI Diagnosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Chacha Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Han Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiamin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Mervak%2C+B+M">Benjamin M. Mervak</a>, <a href="/search/cs?searchtype=author&amp;query=Kalaycioglu%2C+B">Bora Kalaycioglu</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+G">Grace Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Cakmakli%2C+E">Emre Cakmakli</a>, <a href="/search/cs?searchtype=author&amp;query=Bonatti%2C+M">Matteo Bonatti</a>, <a href="/search/cs?searchtype=author&amp;query=Pudu%2C+S">Sridhar Pudu</a>, <a href="/search/cs?searchtype=author&amp;query=Kahraman%2C+O">Osman Kahraman</a>, <a href="/search/cs?searchtype=author&amp;query=Pamuk%2C+G+G">Gul Gizem Pamuk</a>, <a href="/search/cs?searchtype=author&amp;query=Oto%2C+A">Aytekin Oto</a>, <a href="/search/cs?searchtype=author&amp;query=Chatterjee%2C+A">Aritrick Chatterjee</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+C">Chenhao Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03482v1-abstract-short" style="display: inline;"> Despite the growing interest in human-AI decision making, experimental studies with domain experts remain rare, largely due to the complexity of working with domain experts and the challenges in setting up realistic experiments. In this work, we conduct an in-depth collaboration with radiologists in prostate cancer diagnosis based on MRI images. Building on existing tools for teaching prostate can&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03482v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03482v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03482v1-abstract-full" style="display: none;"> Despite the growing interest in human-AI decision making, experimental studies with domain experts remain rare, largely due to the complexity of working with domain experts and the challenges in setting up realistic experiments. In this work, we conduct an in-depth collaboration with radiologists in prostate cancer diagnosis based on MRI images. Building on existing tools for teaching prostate cancer diagnosis, we develop an interface and conduct two experiments to study how AI assistance and performance feedback shape the decision making of domain experts. In Study 1, clinicians were asked to provide an initial diagnosis (human), then view the AI&#39;s prediction, and subsequently finalize their decision (human-AI team). In Study 2 (after a memory wash-out period), the same participants first received aggregated performance statistics from Study 1, specifically their own performance, the AI&#39;s performance, and their human-AI team performance, and then directly viewed the AI&#39;s prediction before making their diagnosis (i.e., no independent initial diagnosis). These two workflows represent realistic ways that clinical AI tools might be used in practice, where the second study simulates a scenario where doctors can adjust their reliance and trust on AI based on prior performance feedback. Our findings show that, while human-AI teams consistently outperform humans alone, they still underperform the AI due to under-reliance, similar to prior studies with crowdworkers. Providing clinicians with performance feedback did not significantly improve the performance of human-AI teams, although showing AI decisions in advance nudges people to follow AI more. Meanwhile, we observe that the ensemble of human-AI teams can outperform AI alone, suggesting promising directions for human-AI collaboration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03482v1-abstract-full').style.display = 'none'; document.getElementById('2502.03482v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03438">arXiv:2502.03438</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.03438">pdf</a>, <a href="https://arxiv.org/format/2502.03438">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> BFS-Prover: Scalable Best-First Tree Search for LLM-based Automatic Theorem Proving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xin%2C+R">Ran Xin</a>, <a href="/search/cs?searchtype=author&amp;query=Xi%2C+C">Chenguang Xi</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jie Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+F">Feng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+H">Hang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+X">Xia Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yifan Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+S">Shen Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+K">Kai Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03438v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) have spurred growing interest in automatic theorem proving using Lean4, where effective tree search methods are crucial for navigating proof search spaces. While the existing approaches primarily rely on value functions and Monte Carlo Tree Search (MCTS), the potential of simpler methods like Best-First Search (BFS) remains underexplored. This pa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03438v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03438v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03438v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) have spurred growing interest in automatic theorem proving using Lean4, where effective tree search methods are crucial for navigating proof search spaces. While the existing approaches primarily rely on value functions and Monte Carlo Tree Search (MCTS), the potential of simpler methods like Best-First Search (BFS) remains underexplored. This paper investigates whether BFS can achieve competitive performance in large-scale theorem proving tasks. We present \texttt{BFS-Prover}, a scalable expert iteration framework, featuring three key innovations. First, we implement strategic data filtering at each expert iteration round, excluding problems solvable via beam search node expansion to focus on harder cases. Second, we improve the sample efficiency of BFS through Direct Preference Optimization (DPO) applied to state-tactic pairs automatically annotated with compiler error feedback, refining the LLM&#39;s policy to prioritize productive expansions. Third, we employ length normalization in BFS to encourage exploration of deeper proof paths. \texttt{BFS-Prover} achieves a score of $71.31$ on the MiniF2F test set and therefore challenges the perceived necessity of complex tree search methods, demonstrating that BFS can achieve competitive performance when properly scaled. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03438v1-abstract-full').style.display = 'none'; document.getElementById('2502.03438v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02590">arXiv:2502.02590</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.02590">pdf</a>, <a href="https://arxiv.org/format/2502.02590">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Articulate AnyMesh: Open-Vocabulary 3D Articulated Objects Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+X">Xiaowen Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jincheng Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yian Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhehuan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yufei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tsun-Hsuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xian%2C+Z">Zhou Xian</a>, <a href="/search/cs?searchtype=author&amp;query=Gan%2C+C">Chuang Gan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02590v1-abstract-short" style="display: inline;"> 3D articulated objects modeling has long been a challenging problem, since it requires to capture both accurate surface geometries and semantically meaningful and spatially precise structures, parts, and joints. Existing methods heavily depend on training data from a limited set of handcrafted articulated object categories (e.g., cabinets and drawers), which restricts their ability to model a wide&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02590v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02590v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02590v1-abstract-full" style="display: none;"> 3D articulated objects modeling has long been a challenging problem, since it requires to capture both accurate surface geometries and semantically meaningful and spatially precise structures, parts, and joints. Existing methods heavily depend on training data from a limited set of handcrafted articulated object categories (e.g., cabinets and drawers), which restricts their ability to model a wide range of articulated objects in an open-vocabulary context. To address these limitations, we propose Articulate Anymesh, an automated framework that is able to convert any rigid 3D mesh into its articulated counterpart in an open-vocabulary manner. Given a 3D mesh, our framework utilizes advanced Vision-Language Models and visual prompting techniques to extract semantic information, allowing for both the segmentation of object parts and the construction of functional joints. Our experiments show that Articulate Anymesh can generate large-scale, high-quality 3D articulated objects, including tools, toys, mechanical devices, and vehicles, significantly expanding the coverage of existing 3D articulated object datasets. Additionally, we show that these generated assets can facilitate the acquisition of new articulated object manipulation skills in simulation, which can then be transferred to a real robotic system. Our Github website is https://articulate-anymesh.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02590v1-abstract-full').style.display = 'none'; document.getElementById('2502.02590v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02340">arXiv:2502.02340</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.02340">pdf</a>, <a href="https://arxiv.org/format/2502.02340">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Transfer Risk Map: Mitigating Pixel-level Negative Transfer in Medical Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Duan%2C+S">Shutong Duan</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jingyun Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+Y">Yang Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+G">Guoqing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiao-Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02340v1-abstract-short" style="display: inline;"> How to mitigate negative transfer in transfer learning is a long-standing and challenging issue, especially in the application of medical image segmentation. Existing methods for reducing negative transfer focus on classification or regression tasks, ignoring the non-uniform negative transfer risk in different image regions. In this work, we propose a simple yet effective weighted fine-tuning meth&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02340v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02340v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02340v1-abstract-full" style="display: none;"> How to mitigate negative transfer in transfer learning is a long-standing and challenging issue, especially in the application of medical image segmentation. Existing methods for reducing negative transfer focus on classification or regression tasks, ignoring the non-uniform negative transfer risk in different image regions. In this work, we propose a simple yet effective weighted fine-tuning method that directs the model&#39;s attention towards regions with significant transfer risk for medical semantic segmentation. Specifically, we compute a transferability-guided transfer risk map to quantify the transfer hardness for each pixel and the potential risks of negative transfer. During the fine-tuning phase, we introduce a map-weighted loss function, normalized with image foreground size to counter class imbalance. Extensive experiments on brain segmentation datasets show our method significantly improves the target task performance, with gains of 4.37% on FeTS2021 and 1.81% on iSeg2019, avoiding negative transfer across modalities and tasks. Meanwhile, a 2.9% gain under a few-shot scenario validates the robustness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02340v1-abstract-full').style.display = 'none'; document.getElementById('2502.02340v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02287">arXiv:2502.02287</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.02287">pdf</a>, <a href="https://arxiv.org/format/2502.02287">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Resource Allocation Optimization Using Large Language Models in Dynamic Wireless Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Noh%2C+H">Hyeonho Noh</a>, <a href="/search/cs?searchtype=author&amp;query=Shim%2C+B">Byonghyo Shim</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+H+J">Hyun Jong Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02287v1-abstract-short" style="display: inline;"> Deep learning (DL) has made notable progress in addressing complex radio access network control challenges that conventional analytic methods have struggled to solve. However, DL has shown limitations in solving constrained NP-hard problems often encountered in network optimization, such as those involving quality of service (QoS) or discrete variables like user indices. Current solutions rely on&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02287v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02287v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02287v1-abstract-full" style="display: none;"> Deep learning (DL) has made notable progress in addressing complex radio access network control challenges that conventional analytic methods have struggled to solve. However, DL has shown limitations in solving constrained NP-hard problems often encountered in network optimization, such as those involving quality of service (QoS) or discrete variables like user indices. Current solutions rely on domain-specific architectures or heuristic techniques, and a general DL approach for constrained optimization remains undeveloped. Moreover, even minor changes in communication objectives demand time-consuming retraining, limiting their adaptability to dynamic environments where task objectives, constraints, environmental factors, and communication scenarios frequently change. To address these challenges, we propose a large language model for resource allocation optimizer (LLM-RAO), a novel approach that harnesses the capabilities of LLMs to address the complex resource allocation problem while adhering to QoS constraints. By employing a prompt-based tuning strategy to flexibly convey ever-changing task descriptions and requirements to the LLM, LLM-RAO demonstrates robust performance and seamless adaptability in dynamic environments without requiring extensive retraining. Simulation results reveal that LLM-RAO achieves up to a 40% performance enhancement compared to conventional DL methods and up to an $80$\% improvement over analytical approaches. Moreover, in scenarios with fluctuating communication objectives, LLM-RAO attains up to 2.9 times the performance of traditional DL-based networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02287v1-abstract-full').style.display = 'none'; document.getElementById('2502.02287v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02215">arXiv:2502.02215</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.02215">pdf</a>, <a href="https://arxiv.org/format/2502.02215">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> InterLCM: Low-Quality Images as Intermediate States of Latent Consistency Models for Effective Blind Face Restoration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Senmao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+K">Kai Wang</a>, <a href="/search/cs?searchtype=author&amp;query=van+de+Weijer%2C+J">Joost van de Weijer</a>, <a href="/search/cs?searchtype=author&amp;query=Khan%2C+F+S">Fahad Shahbaz Khan</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+C">Chun-Le Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+S">Shiqi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yaxing Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+M">Ming-Ming Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02215v1-abstract-short" style="display: inline;"> Diffusion priors have been used for blind face restoration (BFR) by fine-tuning diffusion models (DMs) on restoration datasets to recover low-quality images. However, the naive application of DMs presents several key limitations. (i) The diffusion prior has inferior semantic consistency (e.g., ID, structure and color.), increasing the difficulty of optimizing the BFR model; (ii) reliance on hundre&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02215v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02215v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02215v1-abstract-full" style="display: none;"> Diffusion priors have been used for blind face restoration (BFR) by fine-tuning diffusion models (DMs) on restoration datasets to recover low-quality images. However, the naive application of DMs presents several key limitations. (i) The diffusion prior has inferior semantic consistency (e.g., ID, structure and color.), increasing the difficulty of optimizing the BFR model; (ii) reliance on hundreds of denoising iterations, preventing the effective cooperation with perceptual losses, which is crucial for faithful restoration. Observing that the latent consistency model (LCM) learns consistency noise-to-data mappings on the ODE-trajectory and therefore shows more semantic consistency in the subject identity, structural information and color preservation, we propose InterLCM to leverage the LCM for its superior semantic consistency and efficiency to counter the above issues. Treating low-quality images as the intermediate state of LCM, InterLCM achieves a balance between fidelity and quality by starting from earlier LCM steps. LCM also allows the integration of perceptual loss during training, leading to improved restoration quality, particularly in real-world scenarios. To mitigate structural and semantic uncertainties, InterLCM incorporates a Visual Module to extract visual features and a Spatial Encoder to capture spatial details, enhancing the fidelity of restored images. Extensive experiments demonstrate that InterLCM outperforms existing approaches in both synthetic and real-world datasets while also achieving faster inference speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02215v1-abstract-full').style.display = 'none'; document.getElementById('2502.02215v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICLR2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01312">arXiv:2502.01312</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.01312">pdf</a>, <a href="https://arxiv.org/format/2502.01312">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CleanPose: Category-Level Object Pose Estimation via Causal Learning and Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+X">Xiao Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+Y">Yun Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Liuyi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+X">Xianyou Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+M">Minghao Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jingwei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+C">Chengju Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Q">Qijun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01312v1-abstract-short" style="display: inline;"> Category-level object pose estimation aims to recover the rotation, translation and size of unseen instances within predefined categories. In this task, deep neural network-based methods have demonstrated remarkable performance. However, previous studies show they suffer from spurious correlations raised by &#34;unclean&#34; confounders in models, hindering their performance on novel instances with signif&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01312v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01312v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01312v1-abstract-full" style="display: none;"> Category-level object pose estimation aims to recover the rotation, translation and size of unseen instances within predefined categories. In this task, deep neural network-based methods have demonstrated remarkable performance. However, previous studies show they suffer from spurious correlations raised by &#34;unclean&#34; confounders in models, hindering their performance on novel instances with significant variations. To address this issue, we propose CleanPose, a novel approach integrating causal learning and knowledge distillation to enhance category-level pose estimation. To mitigate the negative effect of unobserved confounders, we develop a causal inference module based on front-door adjustment, which promotes unbiased estimation by reducing potential spurious correlations. Additionally, to further improve generalization ability, we devise a residual-based knowledge distillation method that has proven effective in providing comprehensive category information guidance. Extensive experiments across multiple benchmarks (REAL275, CAMERA25 and HouseCat6D) hightlight the superiority of proposed CleanPose over state-of-the-art methods. Code will be released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01312v1-abstract-full').style.display = 'none'; document.getElementById('2502.01312v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01061">arXiv:2502.01061</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.01061">pdf</a>, <a href="https://arxiv.org/format/2502.01061">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OmniHuman-1: Rethinking the Scaling-Up of One-Stage Conditioned Human Animation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+G">Gaojie Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+J">Jianwen Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiaqi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zerong Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+C">Chao Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01061v2-abstract-short" style="display: inline;"> End-to-end human animation, such as audio-driven talking human generation, has undergone notable advancements in the recent few years. However, existing methods still struggle to scale up as large general video generation models, limiting their potential in real applications. In this paper, we propose OmniHuman, a Diffusion Transformer-based framework that scales up data by mixing motion-related c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01061v2-abstract-full').style.display = 'inline'; document.getElementById('2502.01061v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01061v2-abstract-full" style="display: none;"> End-to-end human animation, such as audio-driven talking human generation, has undergone notable advancements in the recent few years. However, existing methods still struggle to scale up as large general video generation models, limiting their potential in real applications. In this paper, we propose OmniHuman, a Diffusion Transformer-based framework that scales up data by mixing motion-related conditions into the training phase. To this end, we introduce two training principles for these mixed conditions, along with the corresponding model architecture and inference strategy. These designs enable OmniHuman to fully leverage data-driven motion generation, ultimately achieving highly realistic human video generation. More importantly, OmniHuman supports various portrait contents (face close-up, portrait, half-body, full-body), supports both talking and singing, handles human-object interactions and challenging body poses, and accommodates different image styles. Compared to existing end-to-end audio-driven methods, OmniHuman not only produces more realistic videos, but also offers greater flexibility in inputs. It also supports multiple driving modalities (audio-driven, video-driven and combined driving signals). Video samples are provided on the ttfamily project page (https://omnihuman-lab.github.io) <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01061v2-abstract-full').style.display = 'none'; document.getElementById('2502.01061v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://omnihuman-lab.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01000">arXiv:2502.01000</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.01000">pdf</a>, <a href="https://arxiv.org/format/2502.01000">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adapting Foundation Models for Few-Shot Medical Image Segmentation: Actively and Sequentially </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jingyun Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+G">Guoqing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jingge Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01000v1-abstract-short" style="display: inline;"> Recent advances in foundation models have brought promising results in computer vision, including medical image segmentation. Fine-tuning foundation models on specific low-resource medical tasks has become a standard practice. However, ensuring reliable and robust model adaptation when the target task has a large domain gap and few annotated samples remains a challenge. Previous few-shot domain ad&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01000v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01000v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01000v1-abstract-full" style="display: none;"> Recent advances in foundation models have brought promising results in computer vision, including medical image segmentation. Fine-tuning foundation models on specific low-resource medical tasks has become a standard practice. However, ensuring reliable and robust model adaptation when the target task has a large domain gap and few annotated samples remains a challenge. Previous few-shot domain adaptation (FSDA) methods seek to bridge the distribution gap between source and target domains by utilizing auxiliary data. The selection and scheduling of auxiliaries are often based on heuristics, which can easily cause negative transfer. In this work, we propose an Active and Sequential domain AdaPtation (ASAP) framework for dynamic auxiliary dataset selection in FSDA. We formulate FSDA as a multi-armed bandit problem and derive an efficient reward function to prioritize training on auxiliary datasets that align closely with the target task, through a single-round fine-tuning. Empirical validation on diverse medical segmentation datasets demonstrates that our method achieves favorable segmentation performance, significantly outperforming the state-of-the-art FSDA methods, achieving an average gain of 27.75% on MRI and 7.52% on CT datasets in Dice score. Code is available at the git repository: https://github.com/techicoco/ASAP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01000v1-abstract-full').style.display = 'none'; document.getElementById('2502.01000v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00874">arXiv:2502.00874</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.00874">pdf</a>, <a href="https://arxiv.org/format/2502.00874">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Paper Copilot: The Artificial Intelligence and Machine Learning Community Should Adopt a More Transparent and Regulated Peer Review Process </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jing Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00874v1-abstract-short" style="display: inline;"> The rapid growth of submissions to top-tier Artificial Intelligence (AI) and Machine Learning (ML) conferences has prompted many venues to transition from closed to open review platforms. Some have fully embraced open peer reviews, allowing public visibility throughout the process, while others adopt hybrid approaches, such as releasing reviews only after final decisions or keeping reviews private&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00874v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00874v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00874v1-abstract-full" style="display: none;"> The rapid growth of submissions to top-tier Artificial Intelligence (AI) and Machine Learning (ML) conferences has prompted many venues to transition from closed to open review platforms. Some have fully embraced open peer reviews, allowing public visibility throughout the process, while others adopt hybrid approaches, such as releasing reviews only after final decisions or keeping reviews private despite using open peer review systems. In this work, we analyze the strengths and limitations of these models, highlighting the growing community interest in transparent peer review. To support this discussion, we examine insights from Paper Copilot, a website launched two years ago to aggregate and analyze AI / ML conference data while engaging a global audience. The site has attracted over 200,000 early-career researchers, particularly those aged 18-34 from 177 countries, many of whom are actively engaged in the peer review process. Drawing on our findings, this position paper advocates for a more transparent, open, and well-regulated peer review aiming to foster greater community involvement and propel advancements in the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00874v1-abstract-full').style.display = 'none'; document.getElementById('2502.00874v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00717">arXiv:2502.00717</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.00717">pdf</a>, <a href="https://arxiv.org/format/2502.00717">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MINT: Mitigating Hallucinations in Large Vision-Language Models via Token Reduction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Chao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jianming Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yang Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00717v1-abstract-short" style="display: inline;"> Hallucination has been a long-standing and inevitable problem that hinders the application of Large Vision-Language Models (LVLMs) in domains that require high reliability. Various methods focus on improvement depending on data annotations or training strategies, yet place less emphasis on LLM&#39;s inherent problems. To fill this gap, we delve into the attention mechanism of the decoding process in t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00717v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00717v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00717v1-abstract-full" style="display: none;"> Hallucination has been a long-standing and inevitable problem that hinders the application of Large Vision-Language Models (LVLMs) in domains that require high reliability. Various methods focus on improvement depending on data annotations or training strategies, yet place less emphasis on LLM&#39;s inherent problems. To fill this gap, we delve into the attention mechanism of the decoding process in the LVLM. Intriguingly, our investigation uncovers the prevalent attention redundancy within the hierarchical architecture of the LVLM, manifesting as overextended image processing in deep layers and an overabundance of non-essential image tokens. Stemming from the observation, we thus propose MINT, a novel training-free decoding strategy, MItigating hallucinations via tokeN reducTion. Specifically, we dynamically intensify the LVLM&#39;s local perception capability by masking its attention to irrelevant image tokens. In addition, we use contrastive decoding that pushes the model to focus more on those key image regions. Our full method aims to guide the model in concentrating more on key visual elements during generation. Extensive experimental results on several popular public benchmarks show that our approach achieves a 4% improvement in mitigating hallucinations caused by distracted perception compared to original models. Meanwhile, our approach is demonstrated to make the model perceive 5% more visual points even though we reduce a suite of image tokens. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00717v1-abstract-full').style.display = 'none'; document.getElementById('2502.00717v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures, 4 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00317">arXiv:2502.00317</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.00317">pdf</a>, <a href="https://arxiv.org/format/2502.00317">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> DIST: Efficient k-Clique Listing via Induced Subgraph Trie </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nam%2C+Y">Yehyun Nam</a>, <a href="/search/cs?searchtype=author&amp;query=Jang%2C+J">Jihoon Jang</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+K">Kunsoo Park</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jianye Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Long%2C+C">Cheng Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00317v1-abstract-short" style="display: inline;"> Listing k-cliques plays a fundamental role in various data mining tasks, such as community detection and mining of cohesive substructures. Existing algorithms for the k-clique listing problem are built upon a general framework, which finds k-cliques by recursively finding (k-1)-cliques within subgraphs induced by the out-neighbors of each vertex. However, this framework has inherent inefficiency o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00317v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00317v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00317v1-abstract-full" style="display: none;"> Listing k-cliques plays a fundamental role in various data mining tasks, such as community detection and mining of cohesive substructures. Existing algorithms for the k-clique listing problem are built upon a general framework, which finds k-cliques by recursively finding (k-1)-cliques within subgraphs induced by the out-neighbors of each vertex. However, this framework has inherent inefficiency of finding smaller cliques within certain subgraphs repeatedly. In this paper, we propose an algorithm DIST for the k-clique listing problem. In contrast to existing works, the main idea in our approach is to compute each clique in the given graph only once and store it into a data structure called Induced Subgraph Trie, which allows us to retrieve the cliques efficiently. Furthermore, we propose a method to prune search space based on a novel concept called soft embedding of an l-tree, which further improves the running time. We show the superiority of our approach in terms of time and space usage through comprehensive experiments conducted on real-world networks; DIST outperforms the state-of-the-art algorithm by up to two orders of magnitude in both single-threaded and parallel experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00317v1-abstract-full').style.display = 'none'; document.getElementById('2502.00317v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00253">arXiv:2502.00253</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.00253">pdf</a>, <a href="https://arxiv.org/format/2502.00253">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Patch Triplet Similarity Purification for Guided Real-World Low-Dose CT Image Denoising </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Long%2C+J">Junhao Long</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+F">Fengwei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+J">Juncheng Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+B">Baoping Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+C">Chao Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+C">Changliang Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jun Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00253v1-abstract-short" style="display: inline;"> Image denoising of low-dose computed tomography (LDCT) is an important problem for clinical diagnosis with reduced radiation exposure. Previous methods are mostly trained with pairs of synthetic or misaligned LDCT and normal-dose CT (NDCT) images. However, trained with synthetic noise or misaligned LDCT/NDCT image pairs, the denoising networks would suffer from blurry structure or motion artifacts&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00253v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00253v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00253v1-abstract-full" style="display: none;"> Image denoising of low-dose computed tomography (LDCT) is an important problem for clinical diagnosis with reduced radiation exposure. Previous methods are mostly trained with pairs of synthetic or misaligned LDCT and normal-dose CT (NDCT) images. However, trained with synthetic noise or misaligned LDCT/NDCT image pairs, the denoising networks would suffer from blurry structure or motion artifacts. Since non-contrast CT (NCCT) images share the content characteristics to the corresponding NDCT images in a three-phase scan, they can potentially provide useful information for real-world LDCT image denoising. To exploit this aspect, in this paper, we propose to incorporate clean NCCT images as useful guidance for the learning of real-world LDCT image denoising networks. To alleviate the issue of spatial misalignment in training data, we design a new Patch Triplet Similarity Purification (PTSP) strategy to select highly similar patch (instead of image) triplets of LDCT, NDCT, and NCCT images for network training. Furthermore, we modify two image denoising transformers of SwinIR and HAT to accommodate the NCCT image guidance, by replacing vanilla self-attention with cross-attention. On our collected clinical dataset, the modified transformers trained with the data selected by our PTSP strategy show better performance than 15 comparison methods on real-world LDCT image denoising. Ablation studies validate the effectiveness of our NCCT image guidance and PTSP strategy. We will publicly release our data and code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00253v1-abstract-full').style.display = 'none'; document.getElementById('2502.00253v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19374">arXiv:2501.19374</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.19374">pdf</a>, <a href="https://arxiv.org/format/2501.19374">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> </div> </div> <p class="title is-5 mathjax"> Fixing the Double Penalty in Data-Driven Weather Forecasting Through a Modified Spherical Harmonic Loss Function </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Subich%2C+C">Christopher Subich</a>, <a href="/search/cs?searchtype=author&amp;query=Husain%2C+S+Z">Syed Zahid Husain</a>, <a href="/search/cs?searchtype=author&amp;query=Separovic%2C+L">Leo Separovic</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jing Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19374v1-abstract-short" style="display: inline;"> Recent advancements in data-driven weather forecasting models have delivered deterministic models that outperform the leading operational forecast systems based on traditional, physics-based models. However, these data-driven models are typically trained with a mean squared error loss function, which causes smoothing of fine scales through a &#34;double penalty&#34; effect. We develop a simple, parameter-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19374v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19374v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19374v1-abstract-full" style="display: none;"> Recent advancements in data-driven weather forecasting models have delivered deterministic models that outperform the leading operational forecast systems based on traditional, physics-based models. However, these data-driven models are typically trained with a mean squared error loss function, which causes smoothing of fine scales through a &#34;double penalty&#34; effect. We develop a simple, parameter-free modification to this loss function that avoids this problem by separating the loss attributable to decorrelation from the loss attributable to spectral amplitude errors. Fine-tuning the GraphCast model with this new loss function results in sharp deterministic weather forecasts, an increase of the model&#39;s effective resolution from 1,250km to 160km, improvements to ensemble spread, and improvements to predictions of tropical cyclone strength and surface wind extremes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19374v1-abstract-full').style.display = 'none'; document.getElementById('2501.19374v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; I.2.1; J.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18636">arXiv:2501.18636</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.18636">pdf</a>, <a href="https://arxiv.org/format/2501.18636">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> SafeRAG: Benchmarking Security in Retrieval-Augmented Generation of Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liang%2C+X">Xun Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Niu%2C+S">Simin Niu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhiyu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Sensen Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hanyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+F">Feiyu Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+J+Z">Jason Zhaoxin Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+B">Bo Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+S">Shichao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Mengwei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiawei Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18636v1-abstract-short" style="display: inline;"> The indexing-retrieval-generation paradigm of retrieval-augmented generation (RAG) has been highly successful in solving knowledge-intensive tasks by integrating external knowledge into large language models (LLMs). However, the incorporation of external and unverified knowledge increases the vulnerability of LLMs because attackers can perform attack tasks by manipulating knowledge. In this paper,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18636v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18636v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18636v1-abstract-full" style="display: none;"> The indexing-retrieval-generation paradigm of retrieval-augmented generation (RAG) has been highly successful in solving knowledge-intensive tasks by integrating external knowledge into large language models (LLMs). However, the incorporation of external and unverified knowledge increases the vulnerability of LLMs because attackers can perform attack tasks by manipulating knowledge. In this paper, we introduce a benchmark named SafeRAG designed to evaluate the RAG security. First, we classify attack tasks into silver noise, inter-context conflict, soft ad, and white Denial-of-Service. Next, we construct RAG security evaluation dataset (i.e., SafeRAG dataset) primarily manually for each task. We then utilize the SafeRAG dataset to simulate various attack scenarios that RAG may encounter. Experiments conducted on 14 representative RAG components demonstrate that RAG exhibits significant vulnerability to all attack tasks and even the most apparent attack task can easily bypass existing retrievers, filters, or advanced LLMs, resulting in the degradation of RAG service quality. Code is available at: https://github.com/IAAR-Shanghai/SafeRAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18636v1-abstract-full').style.display = 'none'; document.getElementById('2501.18636v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18351">arXiv:2501.18351</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.18351">pdf</a>, <a href="https://arxiv.org/format/2501.18351">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Dual-BEV Nav: Dual-layer BEV-based Heuristic Path Planning for Robotic Navigation in Unstructured Outdoor Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jianfeng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+H">Hanlin Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jiahui Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+S">Shibo Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+K">Ke Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+X">Xuan Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+X">Xian Wei</a>, <a href="/search/cs?searchtype=author&amp;query=You%2C+X">Xiong You</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18351v1-abstract-short" style="display: inline;"> Path planning with strong environmental adaptability plays a crucial role in robotic navigation in unstructured outdoor environments, especially in the case of low-quality location and map information. The path planning ability of a robot depends on the identification of the traversability of global and local ground areas. In real-world scenarios, the complexity of outdoor open environments makes&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18351v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18351v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18351v1-abstract-full" style="display: none;"> Path planning with strong environmental adaptability plays a crucial role in robotic navigation in unstructured outdoor environments, especially in the case of low-quality location and map information. The path planning ability of a robot depends on the identification of the traversability of global and local ground areas. In real-world scenarios, the complexity of outdoor open environments makes it difficult for robots to identify the traversability of ground areas that lack a clearly defined structure. Moreover, most existing methods have rarely analyzed the integration of local and global traversability identifications in unstructured outdoor scenarios. To address this problem, we propose a novel method, Dual-BEV Nav, first introducing Bird&#39;s Eye View (BEV) representations into local planning to generate high-quality traversable paths. Then, these paths are projected onto the global traversability map generated by the global BEV planning model to obtain the optimal waypoints. By integrating the traversability from both local and global BEV, we establish a dual-layer BEV heuristic planning paradigm, enabling long-distance navigation in unstructured outdoor environments. We test our approach through both public dataset evaluations and real-world robot deployments, yielding promising results. Compared to baselines, the Dual-BEV Nav improved temporal distance prediction accuracy by up to $18.7\%$. In the real-world deployment, under conditions significantly different from the training set and with notable occlusions in the global BEV, the Dual-BEV Nav successfully achieved a 65-meter-long outdoor navigation. Further analysis demonstrates that the local BEV representation significantly enhances the rationality of the planning, while the global BEV probability map ensures the robustness of the overall planning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18351v1-abstract-full').style.display = 'none'; document.getElementById('2501.18351v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17711">arXiv:2501.17711</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.17711">pdf</a>, <a href="https://arxiv.org/format/2501.17711">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> STGCN-LSTM for Olympic Medal Prediction: Dynamic Power Modeling and Causal Policy Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yiquan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiaying Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jingyi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Z">Zihao Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17711v1-abstract-short" style="display: inline;"> This paper proposes a novel hybrid model, STGCN-LSTM, to forecast Olympic medal distributions by integrating the spatio-temporal relationships among countries and the long-term dependencies of national performance. The Spatial-Temporal Graph Convolution Network (STGCN) captures geographic and interactive factors-such as coaching exchange and socio-economic links-while the Long Short-Term Memory (L&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17711v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17711v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17711v1-abstract-full" style="display: none;"> This paper proposes a novel hybrid model, STGCN-LSTM, to forecast Olympic medal distributions by integrating the spatio-temporal relationships among countries and the long-term dependencies of national performance. The Spatial-Temporal Graph Convolution Network (STGCN) captures geographic and interactive factors-such as coaching exchange and socio-economic links-while the Long Short-Term Memory (LSTM) module models historical trends in medal counts, economic data, and demographics. To address zero-inflated outputs (i.e., the disparity between countries that consistently yield wins and those never having won medals), a Zero-Inflated Compound Poisson (ZICP) framework is incorporated to separate random zeros from structural zeros, providing a clearer view of potential breakthrough performances. Validation includes historical backtracking, policy shock simulations, and causal inference checks, confirming the robustness of the proposed method. Results shed light on the influence of coaching mobility, event specialization, and strategic investment on medal forecasts, offering a data-driven foundation for optimizing sports policies and resource allocation in diverse Olympic contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17711v1-abstract-full').style.display = 'none'; document.getElementById('2501.17711v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18pages, 7figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17161">arXiv:2501.17161</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.17161">pdf</a>, <a href="https://arxiv.org/format/2501.17161">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SFT Memorizes, RL Generalizes: A Comparative Study of Foundation Model Post-training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chu%2C+T">Tianzhe Chu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhai%2C+Y">Yuexiang Zhai</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jihan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Tong%2C+S">Shengbang Tong</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+S">Saining Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Schuurmans%2C+D">Dale Schuurmans</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+Q+V">Quoc V. Le</a>, <a href="/search/cs?searchtype=author&amp;query=Levine%2C+S">Sergey Levine</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Y">Yi Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17161v1-abstract-short" style="display: inline;"> Supervised fine-tuning (SFT) and reinforcement learning (RL) are widely used post-training techniques for foundation models. However, their roles in enhancing model generalization capabilities remain unclear. This paper studies the difference between SFT and RL on generalization and memorization, focusing on text-based rule variants and visual variants. We introduce GeneralPoints, an arithmetic re&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17161v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17161v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17161v1-abstract-full" style="display: none;"> Supervised fine-tuning (SFT) and reinforcement learning (RL) are widely used post-training techniques for foundation models. However, their roles in enhancing model generalization capabilities remain unclear. This paper studies the difference between SFT and RL on generalization and memorization, focusing on text-based rule variants and visual variants. We introduce GeneralPoints, an arithmetic reasoning card game, and adopt V-IRL, a real-world navigation environment, to assess how models trained with SFT and RL generalize to unseen variants in both textual and visual domains. We show that RL, especially when trained with an outcome-based reward, generalizes across both rule-based textual and visual variants. SFT, in contrast, tends to memorize training data and struggles to generalize out-of-distribution scenarios. Further analysis reveals that RL improves the model&#39;s underlying visual recognition capabilities, contributing to its enhanced generalization in the visual domain. Despite RL&#39;s superior generalization, we show that SFT remains essential for effective RL training; SFT stabilizes the model&#39;s output format, enabling subsequent RL to achieve its performance gains. These findings demonstrates the capability of RL for acquiring generalizable knowledge in complex, multi-modal tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17161v1-abstract-full').style.display = 'none'; document.getElementById('2501.17161v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Website at https://tianzhechu.com/SFTvsRL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16376">arXiv:2501.16376</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.16376">pdf</a>, <a href="https://arxiv.org/format/2501.16376">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HWPQ: Hessian-free Weight Pruning-Quantization For LLM Compression And Acceleration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kang%2C+Y">Yuhan Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+Z">Zhongdi Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+M">Mei Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+Y">Yang Shi</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jun He</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jianchao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xue%2C+Z">Zeyu Xue</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+J">Jing Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xinwang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16376v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have achieved remarkable success across numerous domains. However, the high time complexity of existing pruning and quantization methods significantly hinders their effective deployment on resource-constrained consumer or edge devices. In this study, we propose a novel Hessian-free Weight Pruning-Quantization (HWPQ) method. HWPQ eliminates the need for computationally&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16376v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16376v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16376v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have achieved remarkable success across numerous domains. However, the high time complexity of existing pruning and quantization methods significantly hinders their effective deployment on resource-constrained consumer or edge devices. In this study, we propose a novel Hessian-free Weight Pruning-Quantization (HWPQ) method. HWPQ eliminates the need for computationally intensive Hessian matrix calculations by introducing a contribution-based weight metric, which evaluates the importance of weights without relying on second-order derivatives. Additionally, we employ the Exponentially Weighted Moving Average (EWMA) technique to bypass weight sorting, enabling the selection of weights that contribute most to LLM accuracy and further reducing time complexity. Our approach is extended to support 2:4 structured sparsity pruning, facilitating efficient execution on modern hardware accelerators. Experimental results demonstrate that HWPQ significantly enhances the compression performance of LLaMA2. Compared to state-of-the-art quantization and pruning frameworks, HWPQ achieves average speedups of 5.97x (up to 20.75x) in quantization time and 12.29x (up to 56.02x) in pruning time, while largely preserving model accuracy. Furthermore, we observe a 1.50x inference speedup compared to the baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16376v1-abstract-full').style.display = 'none'; document.getElementById('2501.16376v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16103">arXiv:2501.16103</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.16103">pdf</a>, <a href="https://arxiv.org/ps/2501.16103">ps</a>, <a href="https://arxiv.org/format/2501.16103">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Static Batching of Irregular Workloads on GPUs: Framework and Application to Efficient MoE Model Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yinghan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yifei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiejing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+B">Bujiao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xiaotong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Duan%2C+L">Lian Duan</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Y">Yejun Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xuanyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wente Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yajie Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiacheng Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+P">Peiyang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+L">Laiwen Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+W">Wenyuan Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16103v1-abstract-short" style="display: inline;"> It has long been a problem to arrange and execute irregular workloads on massively parallel devices. We propose a general framework for statically batching irregular workloads into a single kernel with a runtime task mapping mechanism on GPUs. We further apply this framework to Mixture-of-Experts (MoE) model inference and implement an optimized and efficient CUDA kernel. Our MoE kernel achieves up&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16103v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16103v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16103v1-abstract-full" style="display: none;"> It has long been a problem to arrange and execute irregular workloads on massively parallel devices. We propose a general framework for statically batching irregular workloads into a single kernel with a runtime task mapping mechanism on GPUs. We further apply this framework to Mixture-of-Experts (MoE) model inference and implement an optimized and efficient CUDA kernel. Our MoE kernel achieves up to 91% of the peak Tensor Core throughput on NVIDIA H800 GPU and 95% on NVIDIA H20 GPU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16103v1-abstract-full').style.display = 'none'; document.getElementById('2501.16103v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.1.3; I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15418">arXiv:2501.15418</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.15418">pdf</a>, <a href="https://arxiv.org/format/2501.15418">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Episodic Novelty Through Temporal Distance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yuhua Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qihan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yiqin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+X">Xiaoteng Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+D">Dianyu Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+H">Hao Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jun Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+B">Bin Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+B">Bo Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chongjie Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Q">Qianchuan Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15418v1-abstract-short" style="display: inline;"> Exploration in sparse reward environments remains a significant challenge in reinforcement learning, particularly in Contextual Markov Decision Processes (CMDPs), where environments differ across episodes. Existing episodic intrinsic motivation methods for CMDPs primarily rely on count-based approaches, which are ineffective in large state spaces, or on similarity-based methods that lack appropria&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15418v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15418v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15418v1-abstract-full" style="display: none;"> Exploration in sparse reward environments remains a significant challenge in reinforcement learning, particularly in Contextual Markov Decision Processes (CMDPs), where environments differ across episodes. Existing episodic intrinsic motivation methods for CMDPs primarily rely on count-based approaches, which are ineffective in large state spaces, or on similarity-based methods that lack appropriate metrics for state comparison. To address these shortcomings, we propose Episodic Novelty Through Temporal Distance (ETD), a novel approach that introduces temporal distance as a robust metric for state similarity and intrinsic reward computation. By employing contrastive learning, ETD accurately estimates temporal distances and derives intrinsic rewards based on the novelty of states within the current episode. Extensive experiments on various benchmark tasks demonstrate that ETD significantly outperforms state-of-the-art methods, highlighting its effectiveness in enhancing exploration in sparse reward CMDPs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15418v1-abstract-full').style.display = 'none'; document.getElementById('2501.15418v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR2025</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Yang%2C+J&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10