CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 420 results for author: <span class="mathjax">Ma, M</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Ma%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ma, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ma%2C+M&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ma, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17638">arXiv:2502.17638</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.17638">pdf</a>, <a href="https://arxiv.org/format/2502.17638">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Robust Legal Reasoning: Harnessing Logical LLMs in Law </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kant%2C+M">Manuj Kant</a>, <a href="/search/cs?searchtype=author&amp;query=Nabi%2C+S">Sareh Nabi</a>, <a href="/search/cs?searchtype=author&amp;query=Kant%2C+M">Manav Kant</a>, <a href="/search/cs?searchtype=author&amp;query=Scharrer%2C+R">Roland Scharrer</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Megan Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Nabi%2C+M">Marzieh Nabi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17638v1-abstract-short" style="display: inline;"> Legal services rely heavily on text processing. While large language models (LLMs) show promise, their application in legal contexts demands higher accuracy, repeatability, and transparency. Logic programs, by encoding legal concepts as structured rules and facts, offer reliable automation, but require sophisticated text extraction. We propose a neuro-symbolic approach that integrates LLMs&#39; natura&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17638v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17638v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17638v1-abstract-full" style="display: none;"> Legal services rely heavily on text processing. While large language models (LLMs) show promise, their application in legal contexts demands higher accuracy, repeatability, and transparency. Logic programs, by encoding legal concepts as structured rules and facts, offer reliable automation, but require sophisticated text extraction. We propose a neuro-symbolic approach that integrates LLMs&#39; natural language understanding with logic-based reasoning to address these limitations. As a legal document case study, we applied neuro-symbolic AI to coverage-related queries in insurance contracts using both closed and open-source LLMs. While LLMs have improved in legal reasoning, they still lack the accuracy and consistency required for complex contract analysis. In our analysis, we tested three methodologies to evaluate whether a specific claim is covered under a contract: a vanilla LLM, an unguided approach that leverages LLMs to encode both the contract and the claim, and a guided approach that uses a framework for the LLM to encode the contract. We demonstrated the promising capabilities of LLM + Logic in the guided approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17638v1-abstract-full').style.display = 'none'; document.getElementById('2502.17638v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15296">arXiv:2502.15296</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.15296">pdf</a>, <a href="https://arxiv.org/format/2502.15296">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Beyond Fixed Variables: Expanding-variate Time Series Forecasting via Flat Scheme and Spatio-temporal Focal Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Minbo Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+K">Kai Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Huan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Teng%2C+F">Fei Teng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dalin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tianrui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15296v1-abstract-short" style="display: inline;"> Multivariate Time Series Forecasting (MTSF) has long been a key research focus. Traditionally, these studies assume a fixed number of variables, but in real-world applications, Cyber-Physical Systems often expand as new sensors are deployed, increasing variables in MTSF. In light of this, we introduce a novel task, Expanding-variate Time Series Forecasting (EVTSF). This task presents unique challe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15296v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15296v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15296v1-abstract-full" style="display: none;"> Multivariate Time Series Forecasting (MTSF) has long been a key research focus. Traditionally, these studies assume a fixed number of variables, but in real-world applications, Cyber-Physical Systems often expand as new sensors are deployed, increasing variables in MTSF. In light of this, we introduce a novel task, Expanding-variate Time Series Forecasting (EVTSF). This task presents unique challenges, specifically (1) handling inconsistent data shapes caused by adding new variables, and (2) addressing imbalanced spatio-temporal learning, where expanding variables have limited observed data due to the necessity for timely operation. To address these challenges, we propose STEV, a flexible spatio-temporal forecasting framework. STEV includes a new Flat Scheme to tackle the inconsistent data shape issue, which extends the graph-based spatio-temporal modeling architecture into 1D space by flattening the 2D samples along the variable dimension, making the model variable-scale-agnostic while still preserving dynamic spatial correlations through a holistic graph. We introduce a novel Spatio-temporal Focal Learning strategy that incorporates a negative filter to resolve potential conflicts between contrastive learning and graph representation, and a focal contrastive loss as its core to guide the framework to focus on optimizing the expanding variables. We benchmark EVTSF performance using three real-world datasets and compare it against three potential solutions employing SOTA MTSF models tailored for EVSTF. Experimental results show that STEV significantly outperforms its competitors, particularly on expanding variables. Notably, STEV, with only 5% of observations from the expanding period, is on par with SOTA MTSF models trained with complete observations. Further exploration of various expanding strategies underscores the generalizability of STEV in real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15296v1-abstract-full').style.display = 'none'; document.getElementById('2502.15296v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11900">arXiv:2502.11900</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11900">pdf</a>, <a href="https://arxiv.org/format/2502.11900">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Ansatz-free Hamiltonian learning with Heisenberg-limited scaling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hu%2C+H">Hong-Ye Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Muzhou Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+W">Weiyuan Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Q">Qi Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Tong%2C+Y">Yu Tong</a>, <a href="/search/cs?searchtype=author&amp;query=Flammia%2C+S+T">Steven T. Flammia</a>, <a href="/search/cs?searchtype=author&amp;query=Yelin%2C+S+F">Susanne F. Yelin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11900v1-abstract-short" style="display: inline;"> Learning the unknown interactions that govern a quantum system is crucial for quantum information processing, device benchmarking, and quantum sensing. The problem, known as Hamiltonian learning, is well understood under the assumption that interactions are local, but this assumption may not hold for arbitrary Hamiltonians. Previous methods all require high-order inverse polynomial dependency with&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11900v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11900v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11900v1-abstract-full" style="display: none;"> Learning the unknown interactions that govern a quantum system is crucial for quantum information processing, device benchmarking, and quantum sensing. The problem, known as Hamiltonian learning, is well understood under the assumption that interactions are local, but this assumption may not hold for arbitrary Hamiltonians. Previous methods all require high-order inverse polynomial dependency with precision, unable to surpass the standard quantum limit and reach the gold standard Heisenberg-limited scaling. Whether Heisenberg-limited Hamiltonian learning is possible without prior assumptions about the interaction structures, a challenge we term \emph{ansatz-free Hamiltonian learning}, remains an open question. In this work, we present a quantum algorithm to learn arbitrary sparse Hamiltonians without any structure constraints using only black-box queries of the system&#39;s real-time evolution and minimal digital controls to attain Heisenberg-limited scaling in estimation error. Our method is also resilient to state-preparation-and-measurement errors, enhancing its practical feasibility. Moreover, we establish a fundamental trade-off between total evolution time and quantum control on learning arbitrary interactions, revealing the intrinsic interplay between controllability and total evolution time complexity for any learning algorithm. These results pave the way for further exploration into Heisenberg-limited Hamiltonian learning in complex quantum systems under minimal assumptions, potentially enabling new benchmarking and verification protocols. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11900v1-abstract-full').style.display = 'none'; document.getElementById('2502.11900v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 1 figure with Supplementary Materials (17 pages, 1 figure). HYH and MM contributed equally</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11413">arXiv:2502.11413</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11413">pdf</a>, <a href="https://arxiv.org/format/2502.11413">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Statistical Query Hardness of Multiclass Linear Classification with Random Classification Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Diakonikolas%2C+I">Ilias Diakonikolas</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingchen Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+L">Lisheng Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Tzamos%2C+C">Christos Tzamos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11413v1-abstract-short" style="display: inline;"> We study the task of Multiclass Linear Classification (MLC) in the distribution-free PAC model with Random Classification Noise (RCN). Specifically, the learner is given a set of labeled examples $(x, y)$, where $x$ is drawn from an unknown distribution on $R^d$ and the labels are generated by a multiclass linear classifier corrupted with RCN. That is, the label $y$ is flipped from $i$ to $j$ with&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11413v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11413v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11413v1-abstract-full" style="display: none;"> We study the task of Multiclass Linear Classification (MLC) in the distribution-free PAC model with Random Classification Noise (RCN). Specifically, the learner is given a set of labeled examples $(x, y)$, where $x$ is drawn from an unknown distribution on $R^d$ and the labels are generated by a multiclass linear classifier corrupted with RCN. That is, the label $y$ is flipped from $i$ to $j$ with probability $H_{ij}$ according to a known noise matrix $H$ with non-negative separation $蟽: = \min_{i \neq j} H_{ii}-H_{ij}$. The goal is to compute a hypothesis with small 0-1 error. For the special case of two labels, prior work has given polynomial-time algorithms achieving the optimal error. Surprisingly, little is known about the complexity of this task even for three labels. As our main contribution, we show that the complexity of MLC with RCN becomes drastically different in the presence of three or more labels. Specifically, we prove super-polynomial Statistical Query (SQ) lower bounds for this problem. In more detail, even for three labels and constant separation, we give a super-polynomial lower bound on the complexity of any SQ algorithm achieving optimal error. For a larger number of labels and smaller separation, we show a super-polynomial SQ lower bound even for the weaker goal of achieving any constant factor approximation to the optimal loss or even beating the trivial hypothesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11413v1-abstract-full').style.display = 'none'; document.getElementById('2502.11413v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10807">arXiv:2502.10807</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.10807">pdf</a>, <a href="https://arxiv.org/format/2502.10807">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> HybriDNA: A Hybrid Transformer-Mamba2 Long-Range DNA Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingqian Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+G">Guoqing Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+C">Chuan Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+P">Pan Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Dao%2C+T">Tri Dao</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+A">Albert Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+P">Peiran Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yingce Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+R">Renqian Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+P">Pipi Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuan-Jyue Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Haiguang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+T">Tao Qin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10807v2-abstract-short" style="display: inline;"> Advances in natural language processing and large language models have sparked growing interest in modeling DNA, often referred to as the &#34;language of life&#34;. However, DNA modeling poses unique challenges. First, it requires the ability to process ultra-long DNA sequences while preserving single-nucleotide resolution, as individual nucleotides play a critical role in DNA function. Second, success i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10807v2-abstract-full').style.display = 'inline'; document.getElementById('2502.10807v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10807v2-abstract-full" style="display: none;"> Advances in natural language processing and large language models have sparked growing interest in modeling DNA, often referred to as the &#34;language of life&#34;. However, DNA modeling poses unique challenges. First, it requires the ability to process ultra-long DNA sequences while preserving single-nucleotide resolution, as individual nucleotides play a critical role in DNA function. Second, success in this domain requires excelling at both generative and understanding tasks: generative tasks hold potential for therapeutic and industrial applications, while understanding tasks provide crucial insights into biological mechanisms and diseases. To address these challenges, we propose HybriDNA, a decoder-only DNA language model that incorporates a hybrid Transformer-Mamba2 architecture, seamlessly integrating the strengths of attention mechanisms with selective state-space models. This hybrid design enables HybriDNA to efficiently process DNA sequences up to 131kb in length with single-nucleotide resolution. HybriDNA achieves state-of-the-art performance across 33 DNA understanding datasets curated from the BEND, GUE, and LRB benchmarks, and demonstrates exceptional capability in generating synthetic cis-regulatory elements (CREs) with desired properties. Furthermore, we show that HybriDNA adheres to expected scaling laws, with performance improving consistently as the model scales from 300M to 3B and 7B parameters. These findings underscore HybriDNA&#39;s versatility and its potential to advance DNA research and applications, paving the way for innovations in understanding and engineering the &#34;language of life&#34;. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10807v2-abstract-full').style.display = 'none'; document.getElementById('2502.10807v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://hybridna-project.github.io/HybriDNA-Project/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10284">arXiv:2502.10284</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.10284">pdf</a>, <a href="https://arxiv.org/format/2502.10284">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3701716.3715208">10.1145/3701716.3715208 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Hybrid Cross-Stage Coordination Pre-ranking Model for Online Recommendation Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+B">Binglei Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+H">Houying Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+G">Guang Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mian Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+X">Xiwei Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Mei%2C+F">Feng Mei</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+S">Sulong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+J">Jinghe Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10284v1-abstract-short" style="display: inline;"> Large-scale recommendation systems often adopt cascading architecture consisting of retrieval, pre-ranking, ranking, and re-ranking stages. With strict latency requirements, pre-ranking utilizes lightweight models to perform a preliminary selection from massive retrieved candidates. However, recent works focus solely on improving consistency with ranking, relying exclusively on downstream stages.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10284v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10284v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10284v1-abstract-full" style="display: none;"> Large-scale recommendation systems often adopt cascading architecture consisting of retrieval, pre-ranking, ranking, and re-ranking stages. With strict latency requirements, pre-ranking utilizes lightweight models to perform a preliminary selection from massive retrieved candidates. However, recent works focus solely on improving consistency with ranking, relying exclusively on downstream stages. Since downstream input is derived from the pre-ranking output, they will exacerbate the sample selection bias (SSB) issue and Matthew effect, leading to sub-optimal results. To address the limitation, we propose a novel Hybrid Cross-Stage Coordination Pre-ranking model (HCCP) to integrate information from upstream (retrieval) and downstream (ranking, re-ranking) stages. Specifically, cross-stage coordination refers to the pre-ranking&#39;s adaptability to the entire stream and the role of serving as a more effective bridge between upstream and downstream. HCCP consists of Hybrid Sample Construction and Hybrid Objective Optimization. Hybrid sample construction captures multi-level unexposed data from the entire stream and rearranges them to become the optimal guiding &#34;ground truth&#34; for pre-ranking learning. Hybrid objective optimization contains the joint optimization of consistency and long-tail precision through our proposed Margin InfoNCE loss. It is specifically designed to learn from such hybrid unexposed samples, improving the overall performance and mitigating the SSB issue. The appendix describes a proof of the efficacy of the proposed loss in selecting potential positives. Extensive offline and online experiments indicate that HCCP outperforms SOTA methods by improving cross-stage coordination. It contributes up to 14.9% UCVR and 1.3% UCTR in the JD E-commerce recommendation system. Concerning code privacy, we provide a pseudocode for reference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10284v1-abstract-full').style.display = 'none'; document.getElementById('2502.10284v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09955">arXiv:2502.09955</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09955">pdf</a>, <a href="https://arxiv.org/format/2502.09955">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Diverse Inference and Verification for Advanced Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Drori%2C+I">Iddo Drori</a>, <a href="/search/cs?searchtype=author&amp;query=Longhitano%2C+G">Gaston Longhitano</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+M">Mao Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Hyun%2C+S">Seunghwan Hyun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuke Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+S">Sungjun Park</a>, <a href="/search/cs?searchtype=author&amp;query=Meeks%2C+Z">Zachary Meeks</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xin-Yu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Segev%2C+B">Ben Segev</a>, <a href="/search/cs?searchtype=author&amp;query=Yong%2C+H">Howard Yong</a>, <a href="/search/cs?searchtype=author&amp;query=Verma%2C+N">Nakul Verma</a>, <a href="/search/cs?searchtype=author&amp;query=Shporer%2C+A">Avi Shporer</a>, <a href="/search/cs?searchtype=author&amp;query=Amit%2C+A">Alon Amit</a>, <a href="/search/cs?searchtype=author&amp;query=Udell%2C+M">Madeleine Udell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09955v1-abstract-short" style="display: inline;"> Reasoning LLMs such as OpenAI o1, o3 and DeepSeek R1 have made significant progress in mathematics and coding, yet find challenging advanced tasks such as International Mathematical Olympiad (IMO) combinatorics problems, Abstraction and Reasoning Corpus (ARC) puzzles, and Humanity&#39;s Last Exam (HLE) questions. We use a diverse inference approach that combines multiple models and methods at test tim&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09955v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09955v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09955v1-abstract-full" style="display: none;"> Reasoning LLMs such as OpenAI o1, o3 and DeepSeek R1 have made significant progress in mathematics and coding, yet find challenging advanced tasks such as International Mathematical Olympiad (IMO) combinatorics problems, Abstraction and Reasoning Corpus (ARC) puzzles, and Humanity&#39;s Last Exam (HLE) questions. We use a diverse inference approach that combines multiple models and methods at test time. We find that verifying mathematics and code problems, and rejection sampling on other problems is simple and effective. We automatically verify correctness of solutions to IMO problems by Lean, and ARC puzzles by code, and find that best-of-N effectively answers HLE questions. Our approach increases answer accuracy on IMO combinatorics problems from 33.3% to 77.8%, accuracy on HLE questions from 8% to 37%, and solves 80% of ARC puzzles that 948 humans could not and 26.5% of ARC puzzles that o3 high compute does not. Test-time simulations, reinforcement learning, and meta-learning with inference feedback improve generalization by adapting agent graph representations and varying prompts, code, and datasets. Our approach is reliable, robust, and scalable, and in the spirit of reproducible research, we will make it publicly available upon publication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09955v1-abstract-full').style.display = 'none'; document.getElementById('2502.09955v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">165 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07527">arXiv:2502.07527</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07527">pdf</a>, <a href="https://arxiv.org/format/2502.07527">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NatureLM: Deciphering the Language of Nature for Scientific Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yingce Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+P">Peiran Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+S">Shufang Xie</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+L">Liang He</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+C">Chuan Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+R">Renqian Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+G">Guoqing Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zequn Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuan-Jyue Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Z">Zekun Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+Y">Yeqi Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+P">Pan Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Min%2C+Y">Yaosen Min</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Z">Ziheng Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Hao%2C+H">Hongxia Hao</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+H">Han Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jielan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+C">Chang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jia Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+J">Jianwei Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+K">Kehan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+K">Kaiyuan Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Pei%2C+Q">Qizhi Pei</a> , et al. (20 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07527v1-abstract-short" style="display: inline;"> Foundation models have revolutionized natural language processing and artificial intelligence, significantly enhancing how machines comprehend and generate human languages. Inspired by the success of these foundation models, researchers have developed foundation models for individual scientific domains, including small molecules, materials, proteins, DNA, and RNA. However, these models are typical&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07527v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07527v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07527v1-abstract-full" style="display: none;"> Foundation models have revolutionized natural language processing and artificial intelligence, significantly enhancing how machines comprehend and generate human languages. Inspired by the success of these foundation models, researchers have developed foundation models for individual scientific domains, including small molecules, materials, proteins, DNA, and RNA. However, these models are typically trained in isolation, lacking the ability to integrate across different scientific domains. Recognizing that entities within these domains can all be represented as sequences, which together form the &#34;language of nature&#34;, we introduce Nature Language Model (briefly, NatureLM), a sequence-based science foundation model designed for scientific discovery. Pre-trained with data from multiple scientific domains, NatureLM offers a unified, versatile model that enables various applications including: (i) generating and optimizing small molecules, proteins, RNA, and materials using text instructions; (ii) cross-domain generation/design, such as protein-to-molecule and protein-to-RNA generation; and (iii) achieving state-of-the-art performance in tasks like SMILES-to-IUPAC translation and retrosynthesis on USPTO-50k. NatureLM offers a promising generalist approach for various scientific tasks, including drug discovery (hit generation/optimization, ADMET optimization, synthesis), novel material design, and the development of therapeutic proteins or nucleotides. We have developed NatureLM models in different sizes (1 billion, 8 billion, and 46.7 billion parameters) and observed a clear improvement in performance as the model size increases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07527v1-abstract-full').style.display = 'none'; document.getElementById('2502.07527v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">81 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07243">arXiv:2502.07243</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07243">pdf</a>, <a href="https://arxiv.org/format/2502.07243">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xueyao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiaohui Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+K">Kainan Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+Z">Zhenyu Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Manohar%2C+V">Vimal Manohar</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yingru Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Hwang%2C+J">Jeff Hwang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dangna Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yuhao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chan%2C+J">Julian Chan</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yuan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhizheng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingbo Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07243v1-abstract-short" style="display: inline;"> The imitation of voice, targeted on specific speech attributes such as timbre and speaking style, is crucial in speech generation. However, existing methods rely heavily on annotated data, and struggle with effectively disentangling timbre and style, leading to challenges in achieving controllable generation, especially in zero-shot scenarios. To address these issues, we propose Vevo, a versatile&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07243v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07243v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07243v1-abstract-full" style="display: none;"> The imitation of voice, targeted on specific speech attributes such as timbre and speaking style, is crucial in speech generation. However, existing methods rely heavily on annotated data, and struggle with effectively disentangling timbre and style, leading to challenges in achieving controllable generation, especially in zero-shot scenarios. To address these issues, we propose Vevo, a versatile zero-shot voice imitation framework with controllable timbre and style. Vevo operates in two core stages: (1) Content-Style Modeling: Given either text or speech&#39;s content tokens as input, we utilize an autoregressive transformer to generate the content-style tokens, which is prompted by a style reference; (2) Acoustic Modeling: Given the content-style tokens as input, we employ a flow-matching transformer to produce acoustic representations, which is prompted by a timbre reference. To obtain the content and content-style tokens of speech, we design a fully self-supervised approach that progressively decouples the timbre, style, and linguistic content of speech. Specifically, we adopt VQ-VAE as the tokenizer for the continuous hidden features of HuBERT. We treat the vocabulary size of the VQ-VAE codebook as the information bottleneck, and adjust it carefully to obtain the disentangled speech representations. Solely self-supervised trained on 60K hours of audiobook speech data, without any fine-tuning on style-specific corpora, Vevo matches or surpasses existing methods in accent and emotion conversion tasks. Additionally, Vevo&#39;s effectiveness in zero-shot voice conversion and text-to-speech tasks further demonstrates its strong generalization and versatility. Audio samples are available at https://versavoice.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07243v1-abstract-full').style.display = 'none'; document.getElementById('2502.07243v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06130">arXiv:2502.06130</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06130">pdf</a>, <a href="https://arxiv.org/format/2502.06130">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Self-Correcting Decoding with Generative Feedback for Mitigating Hallucinations in Large Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Ce Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+Z">Zifu Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Kan%2C+Z">Zhehan Kan</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M+Q">Martin Q. Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Stepputtis%2C+S">Simon Stepputtis</a>, <a href="/search/cs?searchtype=author&amp;query=Ramanan%2C+D">Deva Ramanan</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Russ Salakhutdinov</a>, <a href="/search/cs?searchtype=author&amp;query=Morency%2C+L">Louis-Philippe Morency</a>, <a href="/search/cs?searchtype=author&amp;query=Sycara%2C+K">Katia Sycara</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Y">Yaqi Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06130v1-abstract-short" style="display: inline;"> While recent Large Vision-Language Models (LVLMs) have shown remarkable performance in multi-modal tasks, they are prone to generating hallucinatory text responses that do not align with the given visual input, which restricts their practical applicability in real-world scenarios. In this work, inspired by the observation that the text-to-image generation process is the inverse of image-conditione&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06130v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06130v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06130v1-abstract-full" style="display: none;"> While recent Large Vision-Language Models (LVLMs) have shown remarkable performance in multi-modal tasks, they are prone to generating hallucinatory text responses that do not align with the given visual input, which restricts their practical applicability in real-world scenarios. In this work, inspired by the observation that the text-to-image generation process is the inverse of image-conditioned response generation in LVLMs, we explore the potential of leveraging text-to-image generative models to assist in mitigating hallucinations in LVLMs. We discover that generative models can offer valuable self-feedback for mitigating hallucinations at both the response and token levels. Building on this insight, we introduce self-correcting Decoding with Generative Feedback (DeGF), a novel training-free algorithm that incorporates feedback from text-to-image generative models into the decoding process to effectively mitigate hallucinations in LVLMs. Specifically, DeGF generates an image from the initial response produced by LVLMs, which acts as an auxiliary visual reference and provides self-feedback to verify and correct the initial response through complementary or contrastive decoding. Extensive experimental results validate the effectiveness of our approach in mitigating diverse types of hallucinations, consistently surpassing state-of-the-art methods across six benchmarks. Code is available at https://github.com/zhangce01/DeGF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06130v1-abstract-full').style.display = 'none'; document.getElementById('2502.06130v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025. Project page:https://zhangce01.github.io/DeGF/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05482">arXiv:2502.05482</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05482">pdf</a>, <a href="https://arxiv.org/format/2502.05482">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robustifying Fourier Features Embeddings for Implicit Neural Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingze Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Q">Qingtian Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhan%2C+Y">Yifan Zhan</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+Z">Zhengwei Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongjun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yinqiang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05482v1-abstract-short" style="display: inline;"> Implicit Neural Representations (INRs) employ neural networks to represent continuous functions by mapping coordinates to the corresponding values of the target function, with applications e.g., inverse graphics. However, INRs face a challenge known as spectral bias when dealing with scenes containing varying frequencies. To overcome spectral bias, the most common approach is the Fourier features-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05482v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05482v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05482v1-abstract-full" style="display: none;"> Implicit Neural Representations (INRs) employ neural networks to represent continuous functions by mapping coordinates to the corresponding values of the target function, with applications e.g., inverse graphics. However, INRs face a challenge known as spectral bias when dealing with scenes containing varying frequencies. To overcome spectral bias, the most common approach is the Fourier features-based methods such as positional encoding. However, Fourier features-based methods will introduce noise to output, which degrades their performances when applied to downstream tasks. In response, this paper initially hypothesizes that combining multi-layer perceptrons (MLPs) with Fourier feature embeddings mutually enhances their strengths, yet simultaneously introduces limitations inherent in Fourier feature embeddings. By presenting a simple theorem, we validate our hypothesis, which serves as a foundation for the design of our solution. Leveraging these insights, we propose the use of multi-layer perceptrons (MLPs) without additive <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05482v1-abstract-full').style.display = 'none'; document.getElementById('2502.05482v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04623">arXiv:2502.04623</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.04623">pdf</a>, <a href="https://arxiv.org/format/2502.04623">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HetSSNet: Spatial-Spectral Heterogeneous Graph Learning Network for Panchromatic and Multispectral Images Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mengting Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yizhen Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+M">Mengjiao Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiaxin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04623v1-abstract-short" style="display: inline;"> Remote sensing pansharpening aims to reconstruct spatial-spectral properties during the fusion of panchromatic (PAN) images and low-resolution multi-spectral (LR-MS) images, finally generating the high-resolution multi-spectral (HR-MS) images. In the mainstream modeling strategies, i.e., CNN and Transformer, the input images are treated as the equal-sized grid of pixels in the Euclidean space. The&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04623v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04623v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04623v1-abstract-full" style="display: none;"> Remote sensing pansharpening aims to reconstruct spatial-spectral properties during the fusion of panchromatic (PAN) images and low-resolution multi-spectral (LR-MS) images, finally generating the high-resolution multi-spectral (HR-MS) images. In the mainstream modeling strategies, i.e., CNN and Transformer, the input images are treated as the equal-sized grid of pixels in the Euclidean space. They have limitations in facing remote sensing images with irregular ground objects. Graph is the more flexible structure, however, there are two major challenges when modeling spatial-spectral properties with graph: \emph{1) constructing the customized graph structure for spatial-spectral relationship priors}; \emph{2) learning the unified spatial-spectral representation through the graph}. To address these challenges, we propose the spatial-spectral heterogeneous graph learning network, named \textbf{HetSSNet}. Specifically, HetSSNet initially constructs the heterogeneous graph structure for pansharpening, which explicitly describes pansharpening-specific relationships. Subsequently, the basic relationship pattern generation module is designed to extract the multiple relationship patterns from the heterogeneous graph. Finally, relationship pattern aggregation module is exploited to collaboratively learn unified spatial-spectral representation across different relationships among nodes with adaptive importance learning from local and global perspectives. Extensive experiments demonstrate the significant superiority and generalization of HetSSNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04623v1-abstract-full').style.display = 'none'; document.getElementById('2502.04623v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17338">arXiv:2501.17338</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.17338">pdf</a>, <a href="https://arxiv.org/format/2501.17338">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Inferring from Logits: Exploring Best Practices for Decoding-Free Generative Candidate Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M+D">Mingyu Derek Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yanna Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Zijie Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+J">Jianxi Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yizhou Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17338v1-abstract-short" style="display: inline;"> Generative Language Models rely on autoregressive decoding to produce the output sequence token by token. Many tasks such as preference optimization, require the model to produce task-level output consisting of multiple tokens directly by selecting candidates from a pool as predictions. Determining a task-level prediction from candidates using the ordinary token-level decoding mechanism is constra&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17338v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17338v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17338v1-abstract-full" style="display: none;"> Generative Language Models rely on autoregressive decoding to produce the output sequence token by token. Many tasks such as preference optimization, require the model to produce task-level output consisting of multiple tokens directly by selecting candidates from a pool as predictions. Determining a task-level prediction from candidates using the ordinary token-level decoding mechanism is constrained by time-consuming decoding and interrupted gradients by discrete token selection. Existing works have been using decoding-free candidate selection methods to obtain candidate probability from initial output logits over vocabulary. Though these estimation methods are widely used, they are not systematically evaluated, especially on end tasks. We introduce an evaluation of a comprehensive collection of decoding-free candidate selection approaches on a comprehensive set of tasks, including five multiple-choice QA tasks with a small candidate pool and four clinical decision tasks with a massive amount of candidates, some with 10k+ options. We evaluate the estimation methods paired with a wide spectrum of foundation LMs covering different architectures, sizes and training paradigms. The results and insights from our analysis inform the future model design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17338v1-abstract-full').style.display = 'none'; document.getElementById('2501.17338v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17326">arXiv:2501.17326</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.17326">pdf</a>, <a href="https://arxiv.org/format/2501.17326">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Memorize and Rank: Elevating Large Language Models for Clinical Diagnosis Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M+D">Mingyu Derek Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xiaoxuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yijia Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Cuturrufo%2C+A">Anthony Cuturrufo</a>, <a href="/search/cs?searchtype=author&amp;query=Nori%2C+V+S">Vijay S Nori</a>, <a href="/search/cs?searchtype=author&amp;query=Halperin%2C+E">Eran Halperin</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17326v1-abstract-short" style="display: inline;"> Clinical diagnosis prediction models, when provided with a patient&#39;s medical history, aim to detect potential diseases early, facilitating timely intervention and improving prognostic outcomes. However, the inherent scarcity of patient data and large disease candidate space often pose challenges in developing satisfactory models for this intricate task. The exploration of leveraging Large Language&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17326v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17326v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17326v1-abstract-full" style="display: none;"> Clinical diagnosis prediction models, when provided with a patient&#39;s medical history, aim to detect potential diseases early, facilitating timely intervention and improving prognostic outcomes. However, the inherent scarcity of patient data and large disease candidate space often pose challenges in developing satisfactory models for this intricate task. The exploration of leveraging Large Language Models (LLMs) for encapsulating clinical decision processes has been limited. We introduce MERA, a clinical diagnosis prediction model that bridges pertaining natural language knowledge with medical practice. We apply hierarchical contrastive learning on a disease candidate ranking list to alleviate the large decision space issue. With concept memorization through fine-tuning, we bridge the natural language clinical knowledge with medical codes. Experimental results on MIMIC-III and IV datasets show that MERA achieves the state-of-the-art diagnosis prediction performance and dramatically elevates the diagnosis prediction capabilities of generative LMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17326v1-abstract-full').style.display = 'none'; document.getElementById('2501.17326v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15455">arXiv:2501.15455</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.15455">pdf</a>, <a href="https://arxiv.org/format/2501.15455">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CD-Lamba: Boosting Remote Sensing Change Detection via a Cross-Temporal Locally Adaptive State Space Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhenkai Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+X">Xiaowen Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Lian%2C+R">Rongrong Lian</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+K">Kai Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mengting Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+S">Siyang Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15455v1-abstract-short" style="display: inline;"> Mamba, with its advantages of global perception and linear complexity, has been widely applied to identify changes of the target regions within the remote sensing (RS) images captured under complex scenarios and varied conditions. However, existing remote sensing change detection (RSCD) approaches based on Mamba frequently struggle to effectively perceive the inherent locality of change regions as&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15455v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15455v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15455v1-abstract-full" style="display: none;"> Mamba, with its advantages of global perception and linear complexity, has been widely applied to identify changes of the target regions within the remote sensing (RS) images captured under complex scenarios and varied conditions. However, existing remote sensing change detection (RSCD) approaches based on Mamba frequently struggle to effectively perceive the inherent locality of change regions as they direct flatten and scan RS images (i.e., the features of the same region of changes are not distributed continuously within the sequence but are mixed with features from other regions throughout the sequence). In this paper, we propose a novel locally adaptive SSM-based approach, termed CD-Lamba, which effectively enhances the locality of change detection while maintaining global perception. Specifically, our CD-Lamba includes a Locally Adaptive State-Space Scan (LASS) strategy for locality enhancement, a Cross-Temporal State-Space Scan (CTSS) strategy for bi-temporal feature fusion, and a Window Shifting and Perception (WSP) mechanism to enhance interactions across segmented windows. These strategies are integrated into a multi-scale Cross-Temporal Locally Adaptive State-Space Scan (CT-LASS) module to effectively highlight changes and refine changes&#39; representations feature generation. CD-Lamba significantly enhances local-global spatio-temporal interactions in bi-temporal images, offering improved performance in RSCD tasks. Extensive experimental results show that CD-Lamba achieves state-of-the-art performance on four benchmark datasets with a satisfactory efficiency-accuracy trade-off. Our code is publicly available at https://github.com/xwmaxwma/rschange. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15455v1-abstract-full').style.display = 'none'; document.getElementById('2501.15455v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14817">arXiv:2501.14817</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.14817">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> A Cutting Mechanics-based Machine Learning Modeling Method to Discover Governing Equations of Machining Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ren%2C+A">Alisa Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mason Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Jiajie Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Karandikar%2C+J">Jaydeep Karandikar</a>, <a href="/search/cs?searchtype=author&amp;query=Tyler%2C+C">Chris Tyler</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+T">Tony Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Schmitz%2C+T">Tony Schmitz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14817v1-abstract-short" style="display: inline;"> This paper proposes a cutting mechanics-based machine learning (CMML) modeling method to discover governing equations of machining dynamics. The main idea of CMML design is to integrate existing physics in cutting mechanics and unknown physics in data to achieve automated model discovery, with the potential to advance machining modeling. Based on existing physics in cutting mechanics, CMML first e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14817v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14817v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14817v1-abstract-full" style="display: none;"> This paper proposes a cutting mechanics-based machine learning (CMML) modeling method to discover governing equations of machining dynamics. The main idea of CMML design is to integrate existing physics in cutting mechanics and unknown physics in data to achieve automated model discovery, with the potential to advance machining modeling. Based on existing physics in cutting mechanics, CMML first establishes a general modeling structure governing machining dynamics, that is represented by a set of unknown differential algebraic equations. CMML can therefore achieve data-driven discovery of these unknown equations through effective cutting mechanics-based nonlinear learning function space design and discrete optimization-based learning algorithm. Experimentally verified time domain simulation of milling is used to validate the proposed modeling method. Numerical results show CMML can discover the exact milling dynamics models with process damping and edge force from noisy data. This indicates that CMML has the potential to be used for advancing machining modeling in practice with the development of effective metrology systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14817v1-abstract-full').style.display = 'none'; document.getElementById('2501.14817v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14249">arXiv:2501.14249</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.14249">pdf</a>, <a href="https://arxiv.org/format/2501.14249">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Humanity&#39;s Last Exam </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&amp;query=Gatti%2C+A">Alice Gatti</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+Z">Ziwen Han</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+J">Josephina Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hugh Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C+B+C">Chen Bo Calvin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Shaaban%2C+M">Mohamed Shaaban</a>, <a href="/search/cs?searchtype=author&amp;query=Ling%2C+J">John Ling</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+S">Sean Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+M">Michael Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Agrawal%2C+A">Anish Agrawal</a>, <a href="/search/cs?searchtype=author&amp;query=Chopra%2C+A">Arnav Chopra</a>, <a href="/search/cs?searchtype=author&amp;query=Khoja%2C+A">Adam Khoja</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+R">Ryan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+R">Richard Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Hausenloy%2C+J">Jason Hausenloy</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+O">Oliver Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T">Tung Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Anderson%2C+D">Daron Anderson</a>, <a href="/search/cs?searchtype=author&amp;query=Shah%2C+I+A">Imad Ali Shah</a>, <a href="/search/cs?searchtype=author&amp;query=Doroshenko%2C+M">Mikhail Doroshenko</a>, <a href="/search/cs?searchtype=author&amp;query=Stokes%2C+A+C">Alun Cennyth Stokes</a>, <a href="/search/cs?searchtype=author&amp;query=Mahmood%2C+M">Mobeen Mahmood</a> , et al. (709 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14249v5-abstract-short" style="display: inline;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity&#39;s Last Exam (HLE), a multi-modal benchmark at the frontier of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v5-abstract-full').style.display = 'inline'; document.getElementById('2501.14249v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14249v5-abstract-full" style="display: none;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity&#39;s Last Exam (HLE), a multi-modal benchmark at the frontier of human knowledge, designed to be the final closed-ended academic benchmark of its kind with broad subject coverage. HLE consists of 2,700 questions across dozens of subjects, including mathematics, humanities, and the natural sciences. HLE is developed globally by subject-matter experts and consists of multiple-choice and short-answer questions suitable for automated grading. Each question has a known solution that is unambiguous and easily verifiable, but cannot be quickly answered via internet retrieval. State-of-the-art LLMs demonstrate low accuracy and calibration on HLE, highlighting a significant gap between current LLM capabilities and the expert human frontier on closed-ended academic questions. To inform research and policymaking upon a clear understanding of model capabilities, we publicly release HLE at https://lastexam.ai. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v5-abstract-full').style.display = 'none'; document.getElementById('2501.14249v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12285">arXiv:2501.12285</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.12285">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Risk Management">q-fin.RM</span> </div> </div> <p class="title is-5 mathjax"> Implementation of an Asymmetric Adjusted Activation Function for Class Imbalance Credit Scoring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xia Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+H">Hanghang Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+K">Kunpeng Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+M">Mao Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12285v1-abstract-short" style="display: inline;"> Credit scoring is a systematic approach to evaluate a borrower&#39;s probability of default (PD) on a bank loan. The data associated with such scenarios are characteristically imbalanced, complicating binary classification owing to the often-underestimated cost of misclassification during the classifier&#39;s learning process. Considering the high imbalance ratio (IR) of these datasets, we introduce an in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12285v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12285v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12285v1-abstract-full" style="display: none;"> Credit scoring is a systematic approach to evaluate a borrower&#39;s probability of default (PD) on a bank loan. The data associated with such scenarios are characteristically imbalanced, complicating binary classification owing to the often-underestimated cost of misclassification during the classifier&#39;s learning process. Considering the high imbalance ratio (IR) of these datasets, we introduce an innovative yet straightforward optimized activation function by incorporating an IR-dependent asymmetric adjusted factor embedded Sigmoid activation function (ASIG). The embedding of ASIG makes the sensitive margin of the Sigmoid function auto-adjustable, depending on the imbalance nature of the datasets distributed, thereby giving the activation function an asymmetric characteristic that prevents the underrepresentation of the minority class (positive samples) during the classifier&#39;s learning process. The experimental results show that the ASIG-embedded-classifier outperforms traditional classifiers on datasets across wide-ranging IRs in the downstream credit-scoring task. The algorithm also shows robustness and stability, even when the IR is ultra-high. Therefore, the algorithm provides a competitive alternative in the financial industry, especially in credit scoring, possessing the ability to effectively process highly imbalanced distribution data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12285v1-abstract-full').style.display = 'none'; document.getElementById('2501.12285v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10677">arXiv:2501.10677</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.10677">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Risk Management">q-fin.RM</span> </div> </div> <p class="title is-5 mathjax"> Class-Imbalanced-Aware Adaptive Dataset Distillation for Scalable Pretrained Model on Credit Scoring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xia Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+H">Hanghang Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xiao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Hong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+M">Mao Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10677v2-abstract-short" style="display: inline;"> The advent of artificial intelligence has significantly enhanced credit scoring technologies. Despite the remarkable efficacy of advanced deep learning models, mainstream adoption continues to favor tree-structured models due to their robust predictive performance on tabular data. Although pretrained models have seen considerable development, their application within the financial realm predominan&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10677v2-abstract-full').style.display = 'inline'; document.getElementById('2501.10677v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10677v2-abstract-full" style="display: none;"> The advent of artificial intelligence has significantly enhanced credit scoring technologies. Despite the remarkable efficacy of advanced deep learning models, mainstream adoption continues to favor tree-structured models due to their robust predictive performance on tabular data. Although pretrained models have seen considerable development, their application within the financial realm predominantly revolves around question-answering tasks and the use of such models for tabular-structured credit scoring datasets remains largely unexplored. Tabular-oriented large models, such as TabPFN, has made the application of large models in credit scoring feasible, albeit can only processing with limited sample sizes. This paper provides a novel framework to combine tabular-tailored dataset distillation technique with the pretrained model, empowers the scalability for TabPFN. Furthermore, though class imbalance distribution is the common nature in financial datasets, its influence during dataset distillation has not been explored. We thus integrate the imbalance-aware techniques during dataset distillation, resulting in improved performance in financial datasets (e.g., a 2.5% enhancement in AUC). This study presents a novel framework for scaling up the application of large pretrained models on financial tabular datasets and offers a comparative analysis of the influence of class imbalance on the dataset distillation process. We believe this approach can broaden the applications and downstream tasks of large models in the financial domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10677v2-abstract-full').style.display = 'none'; document.getElementById('2501.10677v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09426">arXiv:2501.09426</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.09426">pdf</a>, <a href="https://arxiv.org/format/2501.09426">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AutoCBT: An Autonomous Multi-agent Framework for Cognitive Behavioral Therapy in Psychological Counseling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+A">Ancheng Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+D">Di Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+R">Renhao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+J">Jingwei Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+M">Minghuan Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+M">Min Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+W">Wanxin Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingchen Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+H">Haihong Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+B">Bingyu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sha%2C+F">Feng Sha</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chengming Li</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+X">Xiping Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+Q">Qiang Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Wong%2C+D+F">Derek F. Wong</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+R">Ruifeng Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09426v1-abstract-short" style="display: inline;"> Traditional in-person psychological counseling remains primarily niche, often chosen by individuals with psychological issues, while online automated counseling offers a potential solution for those hesitant to seek help due to feelings of shame. Cognitive Behavioral Therapy (CBT) is an essential and widely used approach in psychological counseling. The advent of large language models (LLMs) and a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09426v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09426v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09426v1-abstract-full" style="display: none;"> Traditional in-person psychological counseling remains primarily niche, often chosen by individuals with psychological issues, while online automated counseling offers a potential solution for those hesitant to seek help due to feelings of shame. Cognitive Behavioral Therapy (CBT) is an essential and widely used approach in psychological counseling. The advent of large language models (LLMs) and agent technology enables automatic CBT diagnosis and treatment. However, current LLM-based CBT systems use agents with a fixed structure, limiting their self-optimization capabilities, or providing hollow, unhelpful suggestions due to redundant response patterns. In this work, we utilize Quora-like and YiXinLi single-round consultation models to build a general agent framework that generates high-quality responses for single-turn psychological consultation scenarios. We use a bilingual dataset to evaluate the quality of single-response consultations generated by each framework. Then, we incorporate dynamic routing and supervisory mechanisms inspired by real psychological counseling to construct a CBT-oriented autonomous multi-agent framework, demonstrating its general applicability. Experimental results indicate that AutoCBT can provide higher-quality automated psychological counseling services. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09426v1-abstract-full').style.display = 'none'; document.getElementById('2501.09426v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08528">arXiv:2501.08528</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.08528">pdf</a>, <a href="https://arxiv.org/format/2501.08528">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/IJCNN54540.2023.10191785">10.1109/IJCNN54540.2023.10191785 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dynamic Portfolio Optimization via Augmented DDPG with Quantum Price Levels-Based Trading Strategy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+R">Runsheng Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+Z">Zihan Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingze Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+R+S+T">Raymond S. T. Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08528v1-abstract-short" style="display: inline;"> With the development of deep learning, Dynamic Portfolio Optimization (DPO) problem has received a lot of attention in recent years, not only in the field of finance but also in the field of deep learning. Some advanced research in recent years has proposed the application of Deep Reinforcement Learning (DRL) to the DPO problem, which demonstrated to be more advantageous than supervised learning i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08528v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08528v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08528v1-abstract-full" style="display: none;"> With the development of deep learning, Dynamic Portfolio Optimization (DPO) problem has received a lot of attention in recent years, not only in the field of finance but also in the field of deep learning. Some advanced research in recent years has proposed the application of Deep Reinforcement Learning (DRL) to the DPO problem, which demonstrated to be more advantageous than supervised learning in solving the DPO problem. However, there are still certain unsolved issues: 1) DRL algorithms usually have the problems of slow learning speed and high sample complexity, which is especially problematic when dealing with complex financial data. 2) researchers use DRL simply for the purpose of obtaining high returns, but pay little attention to the problem of risk control and trading strategy, which will affect the stability of model returns. In order to address these issues, in this study we revamped the intrinsic structure of the model based on the Deep Deterministic Policy Gradient (DDPG) and proposed the Augmented DDPG model. Besides, we also proposed an innovative risk control strategy based on Quantum Price Levels (QPLs) derived from Quantum Finance Theory (QFT). Our experimental results revealed that our model has better profitability as well as risk control ability with less sample complexity in the DPO problem compared to the baseline models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08528v1-abstract-full').style.display = 'none'; document.getElementById('2501.08528v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 2023 International Joint Conference on Neural Networks (IJCNN), pp. 1-8, 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06706">arXiv:2501.06706</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.06706">pdf</a>, <a href="https://arxiv.org/format/2501.06706">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> AIOpsLab: A Holistic Framework to Evaluate AI Agents for Enabling Autonomous Clouds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yinfang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Shetty%2C+M">Manish Shetty</a>, <a href="/search/cs?searchtype=author&amp;query=Somashekar%2C+G">Gagan Somashekar</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Minghua Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Simmhan%2C+Y">Yogesh Simmhan</a>, <a href="/search/cs?searchtype=author&amp;query=Mace%2C+J">Jonathan Mace</a>, <a href="/search/cs?searchtype=author&amp;query=Bansal%2C+C">Chetan Bansal</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+R">Rujia Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Rajmohan%2C+S">Saravan Rajmohan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06706v1-abstract-short" style="display: inline;"> AI for IT Operations (AIOps) aims to automate complex operational tasks, such as fault localization and root cause analysis, to reduce human workload and minimize customer impact. While traditional DevOps tools and AIOps algorithms often focus on addressing isolated operational tasks, recent advances in Large Language Models (LLMs) and AI agents are revolutionizing AIOps by enabling end-to-end and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06706v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06706v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06706v1-abstract-full" style="display: none;"> AI for IT Operations (AIOps) aims to automate complex operational tasks, such as fault localization and root cause analysis, to reduce human workload and minimize customer impact. While traditional DevOps tools and AIOps algorithms often focus on addressing isolated operational tasks, recent advances in Large Language Models (LLMs) and AI agents are revolutionizing AIOps by enabling end-to-end and multitask automation. This paper envisions a future where AI agents autonomously manage operational tasks throughout the entire incident lifecycle, leading to self-healing cloud systems, a paradigm we term AgentOps. Realizing this vision requires a comprehensive framework to guide the design, development, and evaluation of these agents. To this end, we present AIOPSLAB, a framework that not only deploys microservice cloud environments, injects faults, generates workloads, and exports telemetry data but also orchestrates these components and provides interfaces for interacting with and evaluating agents. We discuss the key requirements for such a holistic framework and demonstrate how AIOPSLAB can facilitate the evaluation of next-generation AIOps agents. Through evaluations of state-of-the-art LLM agents within the benchmark created by AIOPSLAB, we provide insights into their capabilities and limitations in handling complex operational tasks in cloud environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06706v1-abstract-full').style.display = 'none'; document.getElementById('2501.06706v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05793">arXiv:2501.05793</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.05793">pdf</a>, <a href="https://arxiv.org/format/2501.05793">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> ActMiner: Applying Causality Tracking and Increment Aligning for Graph-based Cyber Threat Hunting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingjun Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+T">Tiantian Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+T">Tieming Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shuang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ying%2C+J">Jie Ying</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+C">Chunlin Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Lv%2C+M">Mingqi Lv</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yan Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05793v1-abstract-short" style="display: inline;"> To defend against Advanced Persistent Threats on the endpoint, threat hunting employs security knowledge such as cyber threat intelligence to continuously analyze system audit logs through retrospective scanning, querying, or pattern matching, aiming to uncover attack patterns/graphs that traditional detection methods (e.g., recognition for Point of Interest) fail to capture. However, existing thr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05793v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05793v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05793v1-abstract-full" style="display: none;"> To defend against Advanced Persistent Threats on the endpoint, threat hunting employs security knowledge such as cyber threat intelligence to continuously analyze system audit logs through retrospective scanning, querying, or pattern matching, aiming to uncover attack patterns/graphs that traditional detection methods (e.g., recognition for Point of Interest) fail to capture. However, existing threat hunting systems based on provenance graphs face challenges of high false negatives, high false positives, and low efficiency when confronted with diverse attack tactics and voluminous audit logs. To address these issues, we propose a system called Actminer, which constructs query graphs from descriptive relationships in cyber threat intelligence reports for precise threat hunting (i.e., graph alignment) on provenance graphs. First, we present a heuristic search strategy based on equivalent semantic transfer to reduce false negatives. Second, we establish a filtering mechanism based on causal relationships of attack behaviors to mitigate false positives. Finally, we design a tree structure to incrementally update the alignment results, significantly improving hunting efficiency. Evaluation on the DARPA Engagement dataset demonstrates that compared to the SOTA POIROT, Actminer reduces false positives by 39.1%, eliminates all false negatives, and effectively counters adversarial attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05793v1-abstract-full').style.display = 'none'; document.getElementById('2501.05793v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00508">arXiv:2501.00508</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.00508">pdf</a>, <a href="https://arxiv.org/ps/2501.00508">ps</a>, <a href="https://arxiv.org/format/2501.00508">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Active Learning of General Halfspaces: Label Queries vs Membership Queries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Diakonikolas%2C+I">Ilias Diakonikolas</a>, <a href="/search/cs?searchtype=author&amp;query=Kane%2C+D+M">Daniel M. Kane</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingchen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00508v1-abstract-short" style="display: inline;"> We study the problem of learning general (i.e., not necessarily homogeneous) halfspaces under the Gaussian distribution on $R^d$ in the presence of some form of query access. In the classical pool-based active learning model, where the algorithm is allowed to make adaptive label queries to previously sampled points, we establish a strong information-theoretic lower bound ruling out non-trivial imp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00508v1-abstract-full').style.display = 'inline'; document.getElementById('2501.00508v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00508v1-abstract-full" style="display: none;"> We study the problem of learning general (i.e., not necessarily homogeneous) halfspaces under the Gaussian distribution on $R^d$ in the presence of some form of query access. In the classical pool-based active learning model, where the algorithm is allowed to make adaptive label queries to previously sampled points, we establish a strong information-theoretic lower bound ruling out non-trivial improvements over the passive setting. Specifically, we show that any active learner requires label complexity of $\tilde惟(d/(\log(m)蔚))$, where $m$ is the number of unlabeled examples. Specifically, to beat the passive label complexity of $\tilde{O} (d/蔚)$, an active learner requires a pool of $2^{poly(d)}$ unlabeled samples. On the positive side, we show that this lower bound can be circumvented with membership query access, even in the agnostic model. Specifically, we give a computationally efficient learner with query complexity of $\tilde{O}(\min\{1/p, 1/蔚\} + d\cdot polylog(1/蔚))$ achieving error guarantee of $O(opt)+蔚$. Here $p \in [0, 1/2]$ is the bias and $opt$ is the 0-1 loss of the optimal halfspace. As a corollary, we obtain a strong separation between the active and membership query models. Taken together, our results characterize the complexity of learning general halfspaces under Gaussian marginals in these models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00508v1-abstract-full').style.display = 'none'; document.getElementById('2501.00508v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17744">arXiv:2412.17744</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.17744">pdf</a>, <a href="https://arxiv.org/format/2412.17744">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RepoTransBench: A Real-World Benchmark for Repository-Level Code Translation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yanli Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yanlin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Suiquan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+D">Daya Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jiachi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Grundy%2C+J">John Grundy</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xilin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Y">Yuchi Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+M">Mingzhi Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hongyu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zibin Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17744v1-abstract-short" style="display: inline;"> Repository-level code translation refers to translating an entire code repository from one programming language to another while preserving the functionality of the source repository. Many benchmarks have been proposed to evaluate the performance of such code translators. However, previous benchmarks mostly provide fine-grained samples, focusing at either code snippet, function, or file-level code&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17744v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17744v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17744v1-abstract-full" style="display: none;"> Repository-level code translation refers to translating an entire code repository from one programming language to another while preserving the functionality of the source repository. Many benchmarks have been proposed to evaluate the performance of such code translators. However, previous benchmarks mostly provide fine-grained samples, focusing at either code snippet, function, or file-level code translation. Such benchmarks do not accurately reflect real-world demands, where entire repositories often need to be translated, involving longer code length and more complex functionalities. To address this gap, we propose a new benchmark, named RepoTransBench, which is a real-world repository-level code translation benchmark with an automatically executable test suite. We conduct experiments on RepoTransBench to evaluate the translation performance of 11 advanced LLMs. We find that the Success@1 score (test success in one attempt) of the best-performing LLM is only 7.33%. To further explore the potential of LLMs for repository-level code translation, we provide LLMs with error-related feedback to perform iterative debugging and observe an average 7.09% improvement on Success@1. However, even with this improvement, the Success@1 score of the best-performing LLM is only 21%, which may not meet the need for reliable automatic repository-level code translation. Finally, we conduct a detailed error analysis and highlight current LLMs&#39; deficiencies in repository-level code translation, which could provide a reference for further improvements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17744v1-abstract-full').style.display = 'none'; document.getElementById('2412.17744v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17247">arXiv:2412.17247</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.17247">pdf</a>, <a href="https://arxiv.org/format/2412.17247">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> STeInFormer: Spatial-Temporal Interaction Transformer Architecture for Remote Sensing Change Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+X">Xiaowen Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhenkai Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mengting Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+M">Mengjiao Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+Z">Zhenhong Du</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17247v1-abstract-short" style="display: inline;"> Convolutional neural networks and attention mechanisms have greatly benefited remote sensing change detection (RSCD) because of their outstanding discriminative ability. Existent RSCD methods often follow a paradigm of using a non-interactive Siamese neural network for multi-temporal feature extraction and change detection heads for feature fusion and change representation. However, this paradigm&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17247v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17247v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17247v1-abstract-full" style="display: none;"> Convolutional neural networks and attention mechanisms have greatly benefited remote sensing change detection (RSCD) because of their outstanding discriminative ability. Existent RSCD methods often follow a paradigm of using a non-interactive Siamese neural network for multi-temporal feature extraction and change detection heads for feature fusion and change representation. However, this paradigm lacks the contemplation of the characteristics of RSCD in temporal and spatial dimensions, and causes the drawback on spatial-temporal interaction that hinders high-quality feature extraction. To address this problem, we present STeInFormer, a spatial-temporal interaction Transformer architecture for multi-temporal feature extraction, which is the first general backbone network specifically designed for RSCD. In addition, we propose a parameter-free multi-frequency token mixer to integrate frequency-domain features that provide spectral information for RSCD. Experimental results on three datasets validate the effectiveness of the proposed method, which can outperform the state-of-the-art methods and achieve the most satisfactory efficiency-accuracy trade-off. Code is available at https://github.com/xwmaxwma/rschange. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17247v1-abstract-full').style.display = 'none'; document.getElementById('2412.17247v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">JSTARS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16844">arXiv:2412.16844</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.16844">pdf</a>, <a href="https://arxiv.org/format/2412.16844">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Sim911: Towards Effective and Equitable 9-1-1 Dispatcher Training with an LLM-Enabled Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zirong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chason%2C+E">Elizabeth Chason</a>, <a href="/search/cs?searchtype=author&amp;query=Mladenovski%2C+N">Noah Mladenovski</a>, <a href="/search/cs?searchtype=author&amp;query=Wilson%2C+E">Erin Wilson</a>, <a href="/search/cs?searchtype=author&amp;query=Mullen%2C+K">Kristin Mullen</a>, <a href="/search/cs?searchtype=author&amp;query=Martini%2C+S">Stephen Martini</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Meiyi Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16844v3-abstract-short" style="display: inline;"> Emergency response services are vital for enhancing public safety by safeguarding the environment, property, and human lives. As frontline members of these services, 9-1-1 dispatchers have a direct impact on response times and the overall effectiveness of emergency operations. However, traditional dispatcher training methods, which rely on role-playing by experienced personnel, are labor-intensive&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16844v3-abstract-full').style.display = 'inline'; document.getElementById('2412.16844v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16844v3-abstract-full" style="display: none;"> Emergency response services are vital for enhancing public safety by safeguarding the environment, property, and human lives. As frontline members of these services, 9-1-1 dispatchers have a direct impact on response times and the overall effectiveness of emergency operations. However, traditional dispatcher training methods, which rely on role-playing by experienced personnel, are labor-intensive, time-consuming, and often neglect the specific needs of underserved communities. To address these challenges, we introduce Sim911, the first training simulation for 9-1-1 dispatchers powered by Large Language Models (LLMs). Sim911 enhances training through three key technical innovations: (1) knowledge construction, which utilizes archived 9-1-1 call data to generate simulations that closely mirror real-world scenarios; (2) context-aware controlled generation, which employs dynamic prompts and vector bases to ensure that LLM behavior aligns with training objectives; and (3) validation with looped correction, which filters out low-quality responses and refines the system performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16844v3-abstract-full').style.display = 'none'; document.getElementById('2412.16844v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16270">arXiv:2412.16270</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.16270">pdf</a>, <a href="https://arxiv.org/format/2412.16270">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> MetaScientist: A Human-AI Synergistic Framework for Automated Mechanical Metamaterial Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qi%2C+J">Jingyuan Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+Z">Zian Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+M">Minqian Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhan%2C+W">Wangzhi Zhan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Junkai Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+X">Xiaofei Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Gan%2C+J">Jingru Gan</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jianpeng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M+D">Mingyu Derek Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+B">Bangzheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Haohui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Kulkarni%2C+A">Adithya Kulkarni</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+M">Muhao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+D">Dawei Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Ling Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+L">Lifu Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16270v1-abstract-short" style="display: inline;"> The discovery of novel mechanical metamaterials, whose properties are dominated by their engineered structures rather than chemical composition, is a knowledge-intensive and resource-demanding process. To accelerate the design of novel metamaterials, we present MetaScientist, a human-in-the-loop system that integrates advanced AI capabilities with expert oversight with two primary phases: (1) hypo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16270v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16270v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16270v1-abstract-full" style="display: none;"> The discovery of novel mechanical metamaterials, whose properties are dominated by their engineered structures rather than chemical composition, is a knowledge-intensive and resource-demanding process. To accelerate the design of novel metamaterials, we present MetaScientist, a human-in-the-loop system that integrates advanced AI capabilities with expert oversight with two primary phases: (1) hypothesis generation, where the system performs complex reasoning to generate novel and scientifically sound hypotheses, supported with domain-specific foundation models and inductive biases retrieved from existing literature; (2) 3D structure synthesis, where a 3D structure is synthesized with a novel 3D diffusion model based on the textual hypothesis and refined it with a LLM-based refinement model to achieve better structure properties. At each phase, domain experts iteratively validate the system outputs, and provide feedback and supplementary materials to ensure the alignment of the outputs with scientific principles and human preferences. Through extensive evaluation from human scientists, MetaScientist is able to deliver novel and valid mechanical metamaterial designs that have the potential to be highly impactful in the metamaterial field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16270v1-abstract-full').style.display = 'none'; document.getElementById('2412.16270v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13365">arXiv:2412.13365</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.13365">pdf</a>, <a href="https://arxiv.org/format/2412.13365">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Quantitative Predictive Monitoring and Control for Safe Human-Machine Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dong%2C+S">Shuyang Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Meiyi Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Lamp%2C+J">Josephine Lamp</a>, <a href="/search/cs?searchtype=author&amp;query=Elbaum%2C+S">Sebastian Elbaum</a>, <a href="/search/cs?searchtype=author&amp;query=Dwyer%2C+M+B">Matthew B. Dwyer</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+L">Lu Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13365v1-abstract-short" style="display: inline;"> There is a growing trend toward AI systems interacting with humans to revolutionize a range of application domains such as healthcare and transportation. However, unsafe human-machine interaction can lead to catastrophic failures. We propose a novel approach that predicts future states by accounting for the uncertainty of human interaction, monitors whether predictions satisfy or violate safety re&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13365v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13365v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13365v1-abstract-full" style="display: none;"> There is a growing trend toward AI systems interacting with humans to revolutionize a range of application domains such as healthcare and transportation. However, unsafe human-machine interaction can lead to catastrophic failures. We propose a novel approach that predicts future states by accounting for the uncertainty of human interaction, monitors whether predictions satisfy or violate safety requirements, and adapts control actions based on the predictive monitoring results. Specifically, we develop a new quantitative predictive monitor based on Signal Temporal Logic with Uncertainty (STL-U) to compute a robustness degree interval, which indicates the extent to which a sequence of uncertain predictions satisfies or violates an STL-U requirement. We also develop a new loss function to guide the uncertainty calibration of Bayesian deep learning and a new adaptive control method, both of which leverage STL-U quantitative predictive monitoring results. We apply the proposed approach to two case studies: Type 1 Diabetes management and semi-autonomous driving. Experiments show that the proposed approach improves safety and effectiveness in both case studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13365v1-abstract-full').style.display = 'none'; document.getElementById('2412.13365v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07998">arXiv:2412.07998</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.07998">pdf</a>, <a href="https://arxiv.org/ps/2412.07998">ps</a>, <a href="https://arxiv.org/format/2412.07998">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> RALI@TREC iKAT 2024: Achieving Personalization via Retrieval Fusion in Conversational Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hui%2C+Y">Yuchen Hui</a>, <a href="/search/cs?searchtype=author&amp;query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+M">Milan Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07998v1-abstract-short" style="display: inline;"> The Recherche Appliquee en Linguistique Informatique (RALI) team participated in the 2024 TREC Interactive Knowledge Assistance (iKAT) Track. In personalized conversational search, effectively capturing a user&#39;s complex search intent requires incorporating both contextual information and key elements from the user profile into query reformulation. The user profile often contains many relevant piec&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07998v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07998v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07998v1-abstract-full" style="display: none;"> The Recherche Appliquee en Linguistique Informatique (RALI) team participated in the 2024 TREC Interactive Knowledge Assistance (iKAT) Track. In personalized conversational search, effectively capturing a user&#39;s complex search intent requires incorporating both contextual information and key elements from the user profile into query reformulation. The user profile often contains many relevant pieces, and each could potentially complement the user&#39;s information needs. It is difficult to disregard any of them, whereas introducing an excessive number of these pieces risks drifting from the original query and hinders search performance. This is a challenge we denote as over-personalization. To address this, we propose different strategies by fusing ranking lists generated from the queries with different levels of personalization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07998v1-abstract-full').style.display = 'none'; document.getElementById('2412.07998v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work presented at NIST Text Retrieval Conference 2024. https://www.nist.gov/news-events/events/2024/11/trec2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.02886">arXiv:2412.02886</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.02886">pdf</a>, <a href="https://arxiv.org/format/2412.02886">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Patchfinder: Leveraging Visual Language Models for Accurate Information Retrieval using Model Uncertainty </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Colman%2C+R">Roman Colman</a>, <a href="/search/cs?searchtype=author&amp;query=Vu%2C+M">Minh Vu</a>, <a href="/search/cs?searchtype=author&amp;query=Bhattarai%2C+M">Manish Bhattarai</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Martin Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Viswanathan%2C+H">Hari Viswanathan</a>, <a href="/search/cs?searchtype=author&amp;query=O%27Malley%2C+D">Daniel O&#39;Malley</a>, <a href="/search/cs?searchtype=author&amp;query=Santos%2C+J+E">Javier E. Santos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.02886v3-abstract-short" style="display: inline;"> For decades, corporations and governments have relied on scanned documents to record vast amounts of information. However, extracting this information is a slow and tedious process due to the sheer volume and complexity of these records. The rise of Vision Language Models (VLMs) presents a way to efficiently and accurately extract the information out of these documents. The current automated workf&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02886v3-abstract-full').style.display = 'inline'; document.getElementById('2412.02886v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.02886v3-abstract-full" style="display: none;"> For decades, corporations and governments have relied on scanned documents to record vast amounts of information. However, extracting this information is a slow and tedious process due to the sheer volume and complexity of these records. The rise of Vision Language Models (VLMs) presents a way to efficiently and accurately extract the information out of these documents. The current automated workflow often requires a two-step approach involving the extraction of information using optical character recognition software and subsequent usage of large language models for processing this information. Unfortunately, these methods encounter significant challenges when dealing with noisy scanned documents, often requiring computationally expensive language models to handle high information density effectively. In this study, we propose PatchFinder, an algorithm that builds upon VLMs to improve information extraction. First, we devise a confidence-based score, called Patch Confidence, based on the Maximum Softmax Probability of the VLMs&#39; output to measure the model&#39;s confidence in its predictions. Using this metric, PatchFinder determines a suitable patch size, partitions the input document into overlapping patches, and generates confidence-based predictions for the target information. Our experimental results show that PatchFinder, leveraging Phi-3v, a 4.2-billion-parameter VLM, achieves an accuracy of 94% on our dataset of 190 noisy scanned documents, outperforming ChatGPT-4o by 18.5 percentage points. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02886v3-abstract-full').style.display = 'none'; document.getElementById('2412.02886v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted to IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) 2025</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.2.2; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00243">arXiv:2412.00243</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.00243">pdf</a>, <a href="https://arxiv.org/ps/2412.00243">ps</a>, <a href="https://arxiv.org/format/2412.00243">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Realistic Corner Case Generation for Autonomous Vehicles with Multimodal Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Q">Qiujing Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Meng Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+X">Ximiao Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xuanhan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+S">Shuo Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00243v1-abstract-short" style="display: inline;"> To guarantee the safety and reliability of autonomous vehicle (AV) systems, corner cases play a crucial role in exploring the system&#39;s behavior under rare and challenging conditions within simulation environments. However, current approaches often fall short in meeting diverse testing needs and struggle to generalize to novel, high-risk scenarios that closely mirror real-world conditions. To tackl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00243v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00243v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00243v1-abstract-full" style="display: none;"> To guarantee the safety and reliability of autonomous vehicle (AV) systems, corner cases play a crucial role in exploring the system&#39;s behavior under rare and challenging conditions within simulation environments. However, current approaches often fall short in meeting diverse testing needs and struggle to generalize to novel, high-risk scenarios that closely mirror real-world conditions. To tackle this challenge, we present AutoScenario, a multimodal Large Language Model (LLM)-based framework for realistic corner case generation. It converts safety-critical real-world data from multiple sources into textual representations, enabling the generalization of key risk factors while leveraging the extensive world knowledge and advanced reasoning capabilities of LLMs.Furthermore, it integrates tools from the Simulation of Urban Mobility (SUMO) and CARLA simulators to simplify and execute the code generated by LLMs. Our experiments demonstrate that AutoScenario can generate realistic and challenging test scenarios, precisely tailored to specific testing requirements or textual descriptions. Additionally, we validated its ability to produce diverse and novel scenarios derived from multimodal real-world data involving risky situations, harnessing the powerful generalization capabilities of LLMs to effectively simulate a wide range of corner cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00243v1-abstract-full').style.display = 'none'; document.getElementById('2412.00243v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18279">arXiv:2411.18279</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.18279">pdf</a>, <a href="https://arxiv.org/format/2411.18279">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Large Language Model-Brained GUI Agents: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chaoyun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+S">Shilin He</a>, <a href="/search/cs?searchtype=author&amp;query=Qian%2C+J">Jiaxu Qian</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+B">Bowen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liqun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+S">Si Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+Y">Yu Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Minghua Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+G">Guyue Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Q">Qingwei Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Rajmohan%2C+S">Saravan Rajmohan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dongmei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18279v9-abstract-short" style="display: inline;"> GUIs have long been central to human-computer interaction, providing an intuitive and visually-driven way to access and interact with digital systems. The advent of LLMs, particularly multimodal models, has ushered in a new era of GUI automation. They have demonstrated exceptional capabilities in natural language understanding, code generation, and visual processing. This has paved the way for a n&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18279v9-abstract-full').style.display = 'inline'; document.getElementById('2411.18279v9-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18279v9-abstract-full" style="display: none;"> GUIs have long been central to human-computer interaction, providing an intuitive and visually-driven way to access and interact with digital systems. The advent of LLMs, particularly multimodal models, has ushered in a new era of GUI automation. They have demonstrated exceptional capabilities in natural language understanding, code generation, and visual processing. This has paved the way for a new generation of LLM-brained GUI agents capable of interpreting complex GUI elements and autonomously executing actions based on natural language instructions. These agents represent a paradigm shift, enabling users to perform intricate, multi-step tasks through simple conversational commands. Their applications span across web navigation, mobile app interactions, and desktop automation, offering a transformative user experience that revolutionizes how individuals interact with software. This emerging field is rapidly advancing, with significant progress in both research and industry. To provide a structured understanding of this trend, this paper presents a comprehensive survey of LLM-brained GUI agents, exploring their historical evolution, core components, and advanced techniques. We address research questions such as existing GUI agent frameworks, the collection and utilization of data for training specialized GUI agents, the development of large action models tailored for GUI tasks, and the evaluation metrics and benchmarks necessary to assess their effectiveness. Additionally, we examine emerging applications powered by these agents. Through a detailed analysis, this survey identifies key research gaps and outlines a roadmap for future advancements in the field. By consolidating foundational knowledge and state-of-the-art developments, this work aims to guide both researchers and practitioners in overcoming challenges and unlocking the full potential of LLM-brained GUI agents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18279v9-abstract-full').style.display = 'none'; document.getElementById('2411.18279v9-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The collection of papers reviewed in this survey will be hosted and regularly updated on the GitHub repository: https://github.com/vyokky/LLM-Brained-GUI-Agents-Survey Additionally, a searchable webpage is available at https://aka.ms/gui-agent for easier access and exploration</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11581">arXiv:2411.11581</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.11581">pdf</a>, <a href="https://arxiv.org/format/2411.11581">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> OASIS: Open Agent Social Interaction Simulations with One Million Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Ziyi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zaibin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zirui Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yuxian Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Gan%2C+Z">Ziyue Gan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhiyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Ling%2C+Z">Zijian Ling</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jinsong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Martz Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+B">Bowen Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+P">Prateek Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+S">Shuyue Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+Z">Zhenfei Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+G">Guohao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+X">Xu Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Lijun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Ghanem%2C+B">Bernard Ghanem</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+H">Huchuan Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+C">Chaochao Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/cs?searchtype=author&amp;query=Torr%2C+P">Philip Torr</a>, <a href="/search/cs?searchtype=author&amp;query=Shao%2C+J">Jing Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11581v4-abstract-short" style="display: inline;"> There has been a growing interest in enhancing rule-based agent-based models (ABMs) for social media platforms (i.e., X, Reddit) with more realistic large language model (LLM) agents, thereby allowing for a more nuanced study of complex systems. As a result, several LLM-based ABMs have been proposed in the past year. While they hold promise, each simulator is specifically designed to study a parti&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11581v4-abstract-full').style.display = 'inline'; document.getElementById('2411.11581v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11581v4-abstract-full" style="display: none;"> There has been a growing interest in enhancing rule-based agent-based models (ABMs) for social media platforms (i.e., X, Reddit) with more realistic large language model (LLM) agents, thereby allowing for a more nuanced study of complex systems. As a result, several LLM-based ABMs have been proposed in the past year. While they hold promise, each simulator is specifically designed to study a particular scenario, making it time-consuming and resource-intensive to explore other phenomena using the same ABM. Additionally, these models simulate only a limited number of agents, whereas real-world social media platforms involve millions of users. To this end, we propose OASIS, a generalizable and scalable social media simulator. OASIS is designed based on real-world social media platforms, incorporating dynamically updated environments (i.e., dynamic social networks and post information), diverse action spaces (i.e., following, commenting), and recommendation systems (i.e., interest-based and hot-score-based). Additionally, OASIS supports large-scale user simulations, capable of modeling up to one million users. With these features, OASIS can be easily extended to different social media platforms to study large-scale group phenomena and behaviors. We replicate various social phenomena, including information spreading, group polarization, and herd effects across X and Reddit platforms. Moreover, we provide observations of social phenomena at different agent group scales. We observe that the larger agent group scale leads to more enhanced group dynamics and more diverse and helpful agents&#39; opinions. These findings demonstrate OASIS&#39;s potential as a powerful tool for studying complex systems in digital environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11581v4-abstract-full').style.display = 'none'; document.getElementById('2411.11581v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03231">arXiv:2411.03231</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03231">pdf</a>, <a href="https://arxiv.org/format/2411.03231">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> Formal Logic-guided Robust Federated Learning against Poisoning Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+D+T">Dung Thuy Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=An%2C+Z">Ziyan An</a>, <a href="/search/cs?searchtype=author&amp;query=Johnson%2C+T+T">Taylor T. Johnson</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Meiyi Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Leach%2C+K">Kevin Leach</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03231v2-abstract-short" style="display: inline;"> Federated Learning (FL) offers a promising solution to the privacy concerns associated with centralized Machine Learning (ML) by enabling decentralized, collaborative learning. However, FL is vulnerable to various security threats, including poisoning attacks, where adversarial clients manipulate the training data or model updates to degrade overall model performance. Recognizing this threat, rese&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03231v2-abstract-full').style.display = 'inline'; document.getElementById('2411.03231v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03231v2-abstract-full" style="display: none;"> Federated Learning (FL) offers a promising solution to the privacy concerns associated with centralized Machine Learning (ML) by enabling decentralized, collaborative learning. However, FL is vulnerable to various security threats, including poisoning attacks, where adversarial clients manipulate the training data or model updates to degrade overall model performance. Recognizing this threat, researchers have focused on developing defense mechanisms to counteract poisoning attacks in FL systems. However, existing robust FL methods predominantly focus on computer vision tasks, leaving a gap in addressing the unique challenges of FL with time series data. In this paper, we present FLORAL, a defense mechanism designed to mitigate poisoning attacks in federated learning for time-series tasks, even in scenarios with heterogeneous client data and a large number of adversarial participants. Unlike traditional model-centric defenses, FLORAL leverages logical reasoning to evaluate client trustworthiness by aligning their predictions with global time-series patterns, rather than relying solely on the similarity of client updates. Our approach extracts logical reasoning properties from clients, then hierarchically infers global properties, and uses these to verify client updates. Through formal logic verification, we assess the robustness of each client contribution, identifying deviations indicative of adversarial behavior. Experimental results on two datasets demonstrate the superior performance of our approach compared to existing baseline methods, highlighting its potential to enhance the robustness of FL to time series applications. Notably, FLORAL reduced the prediction error by 93.27% in the best-case scenario compared to the second-best baseline. Our code is available at https://anonymous.4open.science/r/FLORAL-Robust-FTS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03231v2-abstract-full').style.display = 'none'; document.getElementById('2411.03231v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 4 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02866">arXiv:2411.02866</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.02866">pdf</a>, <a href="https://arxiv.org/format/2411.02866">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Double Whammy: Stealthy Data Manipulation aided Reconstruction Attack on Graph Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jinyin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Minying Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+H">Haibin Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Xuan%2C+Q">Qi Xuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02866v1-abstract-short" style="display: inline;"> Recent research has constructed successful graph reconstruction attack (GRA) on GFL. But these attacks are still challenged in aspects of effectiveness and stealth. To address the issues, we propose the first Data Manipulation aided Reconstruction attack on GFL, dubbed as DMan4Rec. The malicious client is born to manipulate its locally collected data to enhance graph stealing privacy from benign o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02866v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02866v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02866v1-abstract-full" style="display: none;"> Recent research has constructed successful graph reconstruction attack (GRA) on GFL. But these attacks are still challenged in aspects of effectiveness and stealth. To address the issues, we propose the first Data Manipulation aided Reconstruction attack on GFL, dubbed as DMan4Rec. The malicious client is born to manipulate its locally collected data to enhance graph stealing privacy from benign ones, so as to construct double whammy on GFL. It differs from previous work in three terms: (1) effectiveness - to fully utilize the sparsity and feature smoothness of the graph, novel penalty terms are designed adaptive to diverse similarity functions for connected and unconnected node pairs, as well as incorporation label smoothing on top of the original cross-entropy loss. (2) scalability - DMan4Rec is capable of both white-box and black-box attacks via training a supervised model to infer the posterior probabilities obtained from limited queries (3) stealthiness - by manipulating the malicious client&#39;s node features, it can maintain the overall graph structure&#39;s invariance and conceal the attack. Comprehensive experiments on four real datasets and three GNN models demonstrate that DMan4Rec achieves the state-of-the-art (SOTA) attack performance, e.g., the attack AUC and precision improved by 9.2% and 10.5% respectively compared with the SOTA baselines. Particularly, DMan4Rec achieves an AUC score and a precision score of up to 99.59% and 99.56%, respectively in black-box setting. Nevertheless, the complete overlap of the distribution graphs supports the stealthiness of the attack. Besides, DMan4Rec still beats the defensive GFL, which alarms a new threat to GFL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02866v1-abstract-full').style.display = 'none'; document.getElementById('2411.02866v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper is currently being submitted for publication (The submitted journal is TNSE)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00132">arXiv:2411.00132</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.00132">pdf</a>, <a href="https://arxiv.org/format/2411.00132">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Beyond Accuracy: Ensuring Correct Predictions With Correct Rationales </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mengmeng Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+X">Xi Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00132v2-abstract-short" style="display: inline;"> Large pretrained foundation models demonstrate exceptional performance and, in some high-stakes applications, even surpass human experts. However, most of these models are currently evaluated primarily on prediction accuracy, overlooking the validity of the rationales behind their accurate predictions. For the safe deployment of foundation models, there is a pressing need to ensure double-correct&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00132v2-abstract-full').style.display = 'inline'; document.getElementById('2411.00132v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00132v2-abstract-full" style="display: none;"> Large pretrained foundation models demonstrate exceptional performance and, in some high-stakes applications, even surpass human experts. However, most of these models are currently evaluated primarily on prediction accuracy, overlooking the validity of the rationales behind their accurate predictions. For the safe deployment of foundation models, there is a pressing need to ensure double-correct predictions, i.e., correct prediction backed by correct rationales. To achieve this, we propose a two-phase scheme: First, we curate a new dataset that offers structured rationales for visual recognition tasks. Second, we propose a rationale-informed optimization method to guide the model in disentangling and localizing visual evidence for each rationale, without requiring manual annotations. Extensive experiments and ablation studies demonstrate that our model outperforms state-of-the-art models by up to 10.1% in prediction accuracy across a wide range of tasks. Furthermore, our method significantly improves the model&#39;s rationale correctness, improving localization by 7.5% and disentanglement by 36.5%. Our dataset, source code, and pretrained weights: https://github.com/deep-real/DCP <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00132v2-abstract-full').style.display = 'none'; document.getElementById('2411.00132v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings of the 38th Conference on Neural Information Processing Systems (NeurIPS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22597">arXiv:2410.22597</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.22597">pdf</a>, <a href="https://arxiv.org/format/2410.22597">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Are Large-Language Models Graph Algorithmic Reasoners? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Taylor%2C+A+K">Alexander K Taylor</a>, <a href="/search/cs?searchtype=author&amp;query=Cuturrufo%2C+A">Anthony Cuturrufo</a>, <a href="/search/cs?searchtype=author&amp;query=Yathish%2C+V">Vishal Yathish</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M+D">Mingyu Derek Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22597v1-abstract-short" style="display: inline;"> We seek to address a core challenge facing current Large Language Models (LLMs). LLMs have demonstrated superior performance in many tasks, yet continue to struggle with reasoning problems on explicit graphs that require multiple steps. To address this gap, we introduce a novel benchmark designed to evaluate LLM performance on classical algorithmic reasoning tasks on explicit graphs. Our benchmark&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22597v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22597v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22597v1-abstract-full" style="display: none;"> We seek to address a core challenge facing current Large Language Models (LLMs). LLMs have demonstrated superior performance in many tasks, yet continue to struggle with reasoning problems on explicit graphs that require multiple steps. To address this gap, we introduce a novel benchmark designed to evaluate LLM performance on classical algorithmic reasoning tasks on explicit graphs. Our benchmark encompasses five fundamental algorithms: Breadth-First Search (BFS) and Depth-First Search (DFS) for connectivity, Dijkstra&#39;s algorithm and Floyd-Warshall algorithm for all nodes shortest path, and Prim&#39;s Minimum Spanning Tree (MST-Prim&#39;s) algorithm. Through extensive experimentation, we assess the capabilities of state-of-the-art LLMs in executing these algorithms step-by-step and systematically evaluate their performance at each stage. Our findings highlight the persistent challenges LLMs face in this domain and underscore the necessity for advanced prompting techniques and algorithmic instruction to enhance their graph reasoning abilities. This work presents MAGMA, the first comprehensive benchmark focused on LLMs completing classical graph algorithms, and provides a critical step toward understanding and improving their structured problem-solving skills. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22597v1-abstract-full').style.display = 'none'; document.getElementById('2410.22597v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 13 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21492">arXiv:2410.21492</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.21492">pdf</a>, <a href="https://arxiv.org/format/2410.21492">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> FATH: Authentication-based Test-time Defense against Indirect Prompt Injection Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiongxiao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+F">Fangzhou Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Wendi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+J">Jinsheng Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Suh%2C+E">Edward Suh</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+Z+M">Z. Morley Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+M">Muhao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+C">Chaowei Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21492v2-abstract-short" style="display: inline;"> Large language models (LLMs) have been widely deployed as the backbone with additional tools and text information for real-world applications. However, integrating external information into LLM-integrated applications raises significant security concerns. Among these, prompt injection attacks are particularly threatening, where malicious instructions injected in the external text information can e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21492v2-abstract-full').style.display = 'inline'; document.getElementById('2410.21492v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21492v2-abstract-full" style="display: none;"> Large language models (LLMs) have been widely deployed as the backbone with additional tools and text information for real-world applications. However, integrating external information into LLM-integrated applications raises significant security concerns. Among these, prompt injection attacks are particularly threatening, where malicious instructions injected in the external text information can exploit LLMs to generate answers as the attackers desire. While both training-time and test-time defense methods have been developed to mitigate such attacks, the unaffordable training costs associated with training-time methods and the limited effectiveness of existing test-time methods make them impractical. This paper introduces a novel test-time defense strategy, named Formatting AuThentication with Hash-based tags (FATH). Unlike existing approaches that prevent LLMs from answering additional instructions in external text, our method implements an authentication system, requiring LLMs to answer all received instructions with a security policy and selectively filter out responses to user instructions as the final output. To achieve this, we utilize hash-based authentication tags to label each response, facilitating accurate identification of responses according to the user&#39;s instructions and improving the robustness against adaptive attacks. Comprehensive experiments demonstrate that our defense method can effectively defend against indirect prompt injection attacks, achieving state-of-the-art performance under Llama3 and GPT3.5 models across various attack methods. Our code is released at: https://github.com/Jayfeather1024/FATH <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21492v2-abstract-full').style.display = 'none'; document.getElementById('2410.21492v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18928">arXiv:2410.18928</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.18928">pdf</a>, <a href="https://arxiv.org/format/2410.18928">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning $k$-body Hamiltonians via compressed sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Muzhou Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Flammia%2C+S+T">Steven T. Flammia</a>, <a href="/search/cs?searchtype=author&amp;query=Preskill%2C+J">John Preskill</a>, <a href="/search/cs?searchtype=author&amp;query=Tong%2C+Y">Yu Tong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18928v2-abstract-short" style="display: inline;"> We study the problem of learning a $k$-body Hamiltonian with $M$ unknown Pauli terms that are not necessarily geometrically local. We propose a protocol that learns the Hamiltonian to precision $蔚$ with total evolution time ${\mathcal{O}}(M^{1/2+1/p}/蔚)$ up to logarithmic factors, where the error is quantified by the $\ell^p$-distance between Pauli coefficients. Our learning protocol uses only sin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18928v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18928v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18928v2-abstract-full" style="display: none;"> We study the problem of learning a $k$-body Hamiltonian with $M$ unknown Pauli terms that are not necessarily geometrically local. We propose a protocol that learns the Hamiltonian to precision $蔚$ with total evolution time ${\mathcal{O}}(M^{1/2+1/p}/蔚)$ up to logarithmic factors, where the error is quantified by the $\ell^p$-distance between Pauli coefficients. Our learning protocol uses only single-qubit control operations and a GHZ state initial state, is non-adaptive, is robust against SPAM errors, and performs well even if $M$ and $k$ are not precisely known in advance or if the Hamiltonian is not exactly $M$-sparse. Methods from the classical theory of compressed sensing are used for efficiently identifying the $M$ terms in the Hamiltonian from among all possible $k$-body Pauli operators. We also provide a lower bound on the total evolution time needed in this learning task, and we discuss the operational interpretations of the $\ell^1$ and $\ell^2$ error metrics. In contrast to most previous works, our learning protocol requires neither geometric locality nor any other relaxed locality conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18928v2-abstract-full').style.display = 'none'; document.getElementById('2410.18928v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">49 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17709">arXiv:2410.17709</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.17709">pdf</a>, <a href="https://arxiv.org/format/2410.17709">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Deoxys: A Causal Inference Engine for Unhealthy Node Mitigation in Large-scale Cloud Infrastructure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chaoyun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+R">Randolph Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+S">Si Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Ze Li</a>, <a href="/search/cs?searchtype=author&amp;query=Agrawal%2C+S">Shekhar Agrawal</a>, <a href="/search/cs?searchtype=author&amp;query=Mishra%2C+B+R">Binit R. Mishra</a>, <a href="/search/cs?searchtype=author&amp;query=Tran%2C+T">Tri Tran</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Minghua Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Q">Qingwei Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Chintalapati%2C+M">Murali Chintalapati</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dongmei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17709v1-abstract-short" style="display: inline;"> The presence of unhealthy nodes in cloud infrastructure signals the potential failure of machines, which can significantly impact the availability and reliability of cloud services, resulting in negative customer experiences. Effectively addressing unhealthy node mitigation is therefore vital for sustaining cloud system performance. This paper introduces Deoxys, a causal inference engine tailored&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17709v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17709v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17709v1-abstract-full" style="display: none;"> The presence of unhealthy nodes in cloud infrastructure signals the potential failure of machines, which can significantly impact the availability and reliability of cloud services, resulting in negative customer experiences. Effectively addressing unhealthy node mitigation is therefore vital for sustaining cloud system performance. This paper introduces Deoxys, a causal inference engine tailored to recommending mitigation actions for unhealthy node in cloud systems to minimize virtual machine downtime and interruptions during unhealthy events. It employs double machine learning combined with causal forest to produce precise and reliable mitigation recommendations based solely on limited observational data collected from the historical unhealthy events. To enhance the causal inference model, Deoxys further incorporates a policy fallback mechanism based on model uncertainty and action overriding mechanisms to (i) improve the reliability of the system, and (ii) strike a good tradeoff between downtime reduction and resource utilization, thereby enhancing the overall system performance. After deploying Deoxys in a large-scale cloud infrastructure at Microsoft, our observations demonstrate that Deoxys significantly reduces average VM downtime by 53% compared to a legacy policy, while leading to 49.5% lower VM interruption rate. This substantial improvement enhances the reliability and stability of cloud platforms, resulting in a seamless customer experience. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17709v1-abstract-full').style.display = 'none'; document.getElementById('2410.17709v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12592">arXiv:2410.12592</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.12592">pdf</a>, <a href="https://arxiv.org/format/2410.12592">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cocoon: Robust Multi-Modal Perception with Uncertainty-Aware Sensor Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cho%2C+M">Minkyoung Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yulong Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+J">Jiachen Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qingzhao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Pavone%2C+M">Marco Pavone</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J+J">Jeong Joon Park</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+H">Heng Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+Z+M">Z. Morley Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12592v1-abstract-short" style="display: inline;"> An important paradigm in 3D object detection is the use of multiple modalities to enhance accuracy in both normal and challenging conditions, particularly for long-tail scenarios. To address this, recent studies have explored two directions of adaptive approaches: MoE-based adaptive fusion, which struggles with uncertainties arising from distinct object configurations, and late fusion for output-l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12592v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12592v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12592v1-abstract-full" style="display: none;"> An important paradigm in 3D object detection is the use of multiple modalities to enhance accuracy in both normal and challenging conditions, particularly for long-tail scenarios. To address this, recent studies have explored two directions of adaptive approaches: MoE-based adaptive fusion, which struggles with uncertainties arising from distinct object configurations, and late fusion for output-level adaptive fusion, which relies on separate detection pipelines and limits comprehensive understanding. In this work, we introduce Cocoon, an object- and feature-level uncertainty-aware fusion framework. The key innovation lies in uncertainty quantification for heterogeneous representations, enabling fair comparison across modalities through the introduction of a feature aligner and a learnable surrogate ground truth, termed feature impression. We also define a training objective to ensure that their relationship provides a valid metric for uncertainty quantification. Cocoon consistently outperforms existing static and adaptive methods in both normal and challenging conditions, including those with natural and artificial corruptions. Furthermore, we show the validity and efficacy of our uncertainty metric across diverse datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12592v1-abstract-full').style.display = 'none'; document.getElementById('2410.12592v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12224">arXiv:2410.12224</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.12224">pdf</a>, <a href="https://arxiv.org/format/2410.12224">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Causally-Aware Unsupervised Feature Selection Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Z">Zongxin Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yanyong Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+D">Dongjie Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Minbo Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Lv%2C+F">Fengmao Lv</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tianrui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12224v2-abstract-short" style="display: inline;"> Unsupervised feature selection (UFS) has recently gained attention for its effectiveness in processing unlabeled high-dimensional data. However, existing methods overlook the intrinsic causal mechanisms within the data, resulting in the selection of irrelevant features and poor interpretability. Additionally, previous graph-based methods fail to account for the differing impacts of non-causal and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12224v2-abstract-full').style.display = 'inline'; document.getElementById('2410.12224v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12224v2-abstract-full" style="display: none;"> Unsupervised feature selection (UFS) has recently gained attention for its effectiveness in processing unlabeled high-dimensional data. However, existing methods overlook the intrinsic causal mechanisms within the data, resulting in the selection of irrelevant features and poor interpretability. Additionally, previous graph-based methods fail to account for the differing impacts of non-causal and causal features in constructing the similarity graph, which leads to false links in the generated graph. To address these issues, a novel UFS method, called Causally-Aware UnSupErvised Feature Selection learning (CAUSE-FS), is proposed. CAUSE-FS introduces a novel causal regularizer that reweights samples to balance the confounding distribution of each treatment feature. This regularizer is subsequently integrated into a generalized unsupervised spectral regression model to mitigate spurious associations between features and clustering labels, thus achieving causal feature selection. Furthermore, CAUSE-FS employs causality-guided hierarchical clustering to partition features with varying causal contributions into multiple granularities. By integrating similarity graphs learned adaptively at different granularities, CAUSE-FS increases the importance of causal features when constructing the fused similarity graph to capture the reliable local structure of data. Extensive experimental results demonstrate the superiority of CAUSE-FS over state-of-the-art methods, with its interpretability further validated through feature visualization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12224v2-abstract-full').style.display = 'none'; document.getElementById('2410.12224v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11795">arXiv:2410.11795</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.11795">pdf</a>, <a href="https://arxiv.org/format/2410.11795">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient Diffusion Models: A Comprehensive Survey from Principles to Practices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Z">Zhiyuan Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuzhu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+G">Guoli Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+L">Liangliang Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Y">Yichao Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingjie Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+G">Gaofeng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kaiyan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianjun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+B">Bowen Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11795v2-abstract-short" style="display: inline;"> As one of the most popular and sought-after generative models in the recent years, diffusion models have sparked the interests of many researchers and steadily shown excellent advantage in various generative tasks such as image synthesis, video generation, molecule design, 3D scene rendering and multimodal generation, relying on their dense theoretical principles and reliable application practices&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11795v2-abstract-full').style.display = 'inline'; document.getElementById('2410.11795v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11795v2-abstract-full" style="display: none;"> As one of the most popular and sought-after generative models in the recent years, diffusion models have sparked the interests of many researchers and steadily shown excellent advantage in various generative tasks such as image synthesis, video generation, molecule design, 3D scene rendering and multimodal generation, relying on their dense theoretical principles and reliable application practices. The remarkable success of these recent efforts on diffusion models comes largely from progressive design principles and efficient architecture, training, inference, and deployment methodologies. However, there has not been a comprehensive and in-depth review to summarize these principles and practices to help the rapid understanding and application of diffusion models. In this survey, we provide a new efficiency-oriented perspective on these existing efforts, which mainly focuses on the profound principles and efficient practices in architecture designs, model training, fast inference and reliable deployment, to guide further theoretical research, algorithm migration and model application for new scenarios in a reader-friendly way. \url{https://github.com/ponyzym/Efficient-DMs-Survey} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11795v2-abstract-full').style.display = 'none'; document.getElementById('2410.11795v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.9 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11293">arXiv:2410.11293</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.11293">pdf</a>, <a href="https://arxiv.org/format/2410.11293">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TraM : Enhancing User Sleep Prediction with Transformer-based Multivariate Time Series Modeling and Machine Learning Ensembles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jinjae Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Minjeong Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+E">Eunjee Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+K">Keunhee Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+C">Chanwoo Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11293v1-abstract-short" style="display: inline;"> This paper presents a novel approach that leverages Transformer-based multivariate time series model and Machine Learning Ensembles to predict the quality of human sleep, emotional states, and stress levels. A formula to calculate the labels was developed, and the various models were applied to user data. Time Series Transformer was used for labels where time series characteristics are crucial, wh&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11293v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11293v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11293v1-abstract-full" style="display: none;"> This paper presents a novel approach that leverages Transformer-based multivariate time series model and Machine Learning Ensembles to predict the quality of human sleep, emotional states, and stress levels. A formula to calculate the labels was developed, and the various models were applied to user data. Time Series Transformer was used for labels where time series characteristics are crucial, while Machine Learning Ensembles were employed for labels requiring comprehensive daily activity statistics. Time Series Transformer excels in capturing the characteristics of time series through pre-training, while Machine Learning Ensembles select machine learning models that meet our categorization criteria. The proposed model, TraM, scored 6.10 out of 10 in experiments, demonstrating superior performance compared to other methodologies. The code and configuration for the TraM framework are available at: https://github.com/jin-jae/ETRI-Paper-Contest. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11293v1-abstract-full').style.display = 'none'; document.getElementById('2410.11293v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09904">arXiv:2410.09904</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.09904">pdf</a>, <a href="https://arxiv.org/format/2410.09904">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> Equitable Access to Justice: Logical LLMs Show Promise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kant%2C+M">Manuj Kant</a>, <a href="/search/cs?searchtype=author&amp;query=Kant%2C+M">Manav Kant</a>, <a href="/search/cs?searchtype=author&amp;query=Nabi%2C+M">Marzieh Nabi</a>, <a href="/search/cs?searchtype=author&amp;query=Carlson%2C+P">Preston Carlson</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Megan Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09904v1-abstract-short" style="display: inline;"> The costs and complexity of the American judicial system limit access to legal solutions for many Americans. Large language models (LLMs) hold great potential to improve access to justice. However, a major challenge in applying AI and LLMs in legal contexts, where consistency and reliability are crucial, is the need for System 2 reasoning. In this paper, we explore the integration of LLMs with log&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09904v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09904v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09904v1-abstract-full" style="display: none;"> The costs and complexity of the American judicial system limit access to legal solutions for many Americans. Large language models (LLMs) hold great potential to improve access to justice. However, a major challenge in applying AI and LLMs in legal contexts, where consistency and reliability are crucial, is the need for System 2 reasoning. In this paper, we explore the integration of LLMs with logic programming to enhance their ability to reason, bringing their strategic capabilities closer to that of a skilled lawyer. Our objective is to translate laws and contracts into logic programs that can be applied to specific legal cases, with a focus on insurance contracts. We demonstrate that while GPT-4o fails to encode a simple health insurance contract into logical code, the recently released OpenAI o1-preview model succeeds, exemplifying how LLMs with advanced System 2 reasoning capabilities can expand access to justice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09904v1-abstract-full').style.display = 'none'; document.getElementById('2410.09904v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08475">arXiv:2410.08475</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.08475">pdf</a>, <a href="https://arxiv.org/format/2410.08475">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> GIVE: Structured Reasoning of Large Language Models with Knowledge Graph Inspired Veracity Extrapolation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jiashu He</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M+D">Mingyu Derek Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+J">Jinxuan Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Roth%2C+D">Dan Roth</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Ribeiro%2C+A">Alejandro Ribeiro</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08475v2-abstract-short" style="display: inline;"> Existing approaches based on context prompting or reinforcement learning (RL) to improve the reasoning capacities of large language models (LLMs) depend on the LLMs&#39; internal knowledge to produce reliable Chain-Of-Thought (CoT). However, no matter the size of LLMs, certain problems cannot be resolved in a single forward pass. Meanwhile, agent-based reasoning systems require access to a comprehensi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08475v2-abstract-full').style.display = 'inline'; document.getElementById('2410.08475v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08475v2-abstract-full" style="display: none;"> Existing approaches based on context prompting or reinforcement learning (RL) to improve the reasoning capacities of large language models (LLMs) depend on the LLMs&#39; internal knowledge to produce reliable Chain-Of-Thought (CoT). However, no matter the size of LLMs, certain problems cannot be resolved in a single forward pass. Meanwhile, agent-based reasoning systems require access to a comprehensive nonparametric knowledge base, which is often costly or not feasible for use in scientific and niche domains. We present Graph Inspired Veracity Extrapolation (GIVE), a novel reasoning method that merges parametric and non-parametric memories to improve accurate reasoning with minimal external input. GIVE guides the LLM agent to select the most pertinent expert data (observe), engage in query-specific divergent thinking (reflect), and then synthesize this information to produce the final output (speak). Extensive experiments demonstrated the following benefits of our framework: (1) GIVE boosts the performance of LLMs across various sizes. (2) In some scenarios, GIVE allows smaller LLMs to surpass larger, more sophisticated ones in scientific tasks (GPT3.5T + GIVE &gt; GPT4). (3) GIVE is effective on scientific and open-domain assessments. (4) GIVE is a training-free method that enables LLMs to tackle new problems that extend beyond their training data (up to 43.5% -&gt; 88.2%} accuracy improvement). (5) GIVE allows LLM agents to reason using both restricted (very small) and noisy (very large) knowledge sources, accommodating knowledge graphs (KG) ranging from 135 to more than 840k nodes. (6) The reasoning process involved in GIVE is fully interpretable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08475v2-abstract-full').style.display = 'none'; document.getElementById('2410.08475v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08082">arXiv:2410.08082</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.08082">pdf</a>, <a href="https://arxiv.org/format/2410.08082">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ToMiE: Towards Modular Growth in Enhanced SMPL Skeleton for 3D Human with Animatable Garments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhan%2C+Y">Yifan Zhan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Q">Qingtian Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Niu%2C+M">Muyao Niu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingze Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+J">Jiancheng Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+Z">Zhihang Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+X">Xiao Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yinqiang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08082v1-abstract-short" style="display: inline;"> In this paper, we highlight a critical yet often overlooked factor in most 3D human tasks, namely modeling humans with complex garments. It is known that the parameterized formulation of SMPL is able to fit human skin; while complex garments, e.g., hand-held objects and loose-fitting garments, are difficult to get modeled within the unified framework, since their movements are usually decoupled wi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08082v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08082v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08082v1-abstract-full" style="display: none;"> In this paper, we highlight a critical yet often overlooked factor in most 3D human tasks, namely modeling humans with complex garments. It is known that the parameterized formulation of SMPL is able to fit human skin; while complex garments, e.g., hand-held objects and loose-fitting garments, are difficult to get modeled within the unified framework, since their movements are usually decoupled with the human body. To enhance the capability of SMPL skeleton in response to this situation, we propose a modular growth strategy that enables the joint tree of the skeleton to expand adaptively. Specifically, our method, called ToMiE, consists of parent joints localization and external joints optimization. For parent joints localization, we employ a gradient-based approach guided by both LBS blending weights and motion kernels. Once the external joints are obtained, we proceed to optimize their transformations in SE(3) across different frames, enabling rendering and explicit animation. ToMiE manages to outperform other methods across various cases with garments, not only in rendering quality but also by offering free animation of grown joints, thereby enhancing the expressive ability of SMPL skeleton for a broader range of applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08082v1-abstract-full').style.display = 'none'; document.getElementById('2410.08082v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07265">arXiv:2410.07265</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.07265">pdf</a>, <a href="https://arxiv.org/format/2410.07265">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> A Survey: Collaborative Hardware and Software Design in the Era of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+C">Cong Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+F">Feng Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+Z">Zhixu Du</a>, <a href="/search/cs?searchtype=author&amp;query=Kiessling%2C+J">James Kiessling</a>, <a href="/search/cs?searchtype=author&amp;query=Ku%2C+J">Jonathan Ku</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shiyu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Ziru Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mingyuan Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Molom-Ochir%2C+T">Tergel Molom-Ochir</a>, <a href="/search/cs?searchtype=author&amp;query=Morris%2C+B">Benjamin Morris</a>, <a href="/search/cs?searchtype=author&amp;query=Shan%2C+H">Haoxuan Shan</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+J">Jingwei Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yitu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+C">Chiyue Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xueying Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yuhao Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+H+F">Hao Frank Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jingyang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Junyao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Q">Qilin Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+G">Guanglei Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Hai"> Hai</a>, <a href="/search/cs?searchtype=author&amp;query=Li"> Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yiran Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07265v1-abstract-short" style="display: inline;"> The rapid development of large language models (LLMs) has significantly transformed the field of artificial intelligence, demonstrating remarkable capabilities in natural language processing and moving towards multi-modal functionality. These models are increasingly integrated into diverse applications, impacting both research and industry. However, their development and deployment present substan&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07265v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07265v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07265v1-abstract-full" style="display: none;"> The rapid development of large language models (LLMs) has significantly transformed the field of artificial intelligence, demonstrating remarkable capabilities in natural language processing and moving towards multi-modal functionality. These models are increasingly integrated into diverse applications, impacting both research and industry. However, their development and deployment present substantial challenges, including the need for extensive computational resources, high energy consumption, and complex software optimizations. Unlike traditional deep learning systems, LLMs require unique optimization strategies for training and inference, focusing on system-level efficiency. This paper surveys hardware and software co-design approaches specifically tailored to address the unique characteristics and constraints of large language models. This survey analyzes the challenges and impacts of LLMs on hardware and algorithm research, exploring algorithm optimization, hardware design, and system-level innovations. It aims to provide a comprehensive understanding of the trade-offs and considerations in LLM-centric computing systems, guiding future advancements in AI. Finally, we summarize the existing efforts in this space and outline future directions toward realizing production-grade co-design methodologies for the next generation of large language models and AI systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07265v1-abstract-full').style.display = 'none'; document.getElementById('2410.07265v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Circuits and Systems Magazine</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04524">arXiv:2410.04524</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.04524">pdf</a>, <a href="https://arxiv.org/format/2410.04524">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Toward Secure Tuning: Mitigating Security Risks from Instruction Fine-Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Du%2C+Y">Yanrui Du</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+S">Sendong Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+J">Jiawei Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Ming Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+D">Danyang Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+S">Shuren Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+F">Fenglei Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Ting Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+B">Bing Qin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04524v2-abstract-short" style="display: inline;"> Instruction fine-tuning has emerged as a critical technique for customizing Large Language Models (LLMs) to specific applications. However, recent studies have highlighted significant security vulnerabilities in fine-tuned LLMs. Existing defense efforts focus more on pre-training and post-training methods, yet there remains underexplored in in-training methods. To fill this gap, we introduce a nov&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04524v2-abstract-full').style.display = 'inline'; document.getElementById('2410.04524v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04524v2-abstract-full" style="display: none;"> Instruction fine-tuning has emerged as a critical technique for customizing Large Language Models (LLMs) to specific applications. However, recent studies have highlighted significant security vulnerabilities in fine-tuned LLMs. Existing defense efforts focus more on pre-training and post-training methods, yet there remains underexplored in in-training methods. To fill this gap, we introduce a novel secure-tuning strategy called SWAT. By analyzing how module-level parameters (e.g. Q/K/V/O) affect the security feature space drift, we identify a robust subset of modules, termed Mods_Rob. Our SWAT strategy begins by warming up Mods_Rob to capture low-level features with minimal security risks, followed by training all parameters to achieve optimal task performance. Essentially, this strategy shifts the early learning burden more from global parameters to Mods_Rob, reducing update magnitudes of the non-robust subset. Across various datasets, scenarios, and LLMs, our strategy has demonstrated significant success in mitigating security risks while preserving task performance. Importantly, it can be seamlessly integrated with pre-training and post-training methods, leading to greater improvements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04524v2-abstract-full').style.display = 'none'; document.getElementById('2410.04524v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ma%2C+M&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10