Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 469 results for author: <span class="mathjax">Shi, W</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Shi%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Shi, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Shi%2C+W&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Shi, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Shi%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17604">arXiv:2502.17604</a> <span> [<a href="https://arxiv.org/pdf/2502.17604">pdf</a>, <a href="https://arxiv.org/format/2502.17604">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Weaving the Cosmos: WASM-Powered Interchain Communication for AI Enabled Smart Contracts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Karanjai%2C+R">Rabimba Karanjai</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Lei Xu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weidong Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17604v1-abstract-short" style="display: inline;"> In this era, significant transformations in industries and tool utilization are driven by AI/Large Language Models (LLMs) and advancements in Machine Learning. There's a growing emphasis on Machine Learning Operations(MLOps) for managing and deploying these AI models. Concurrently, the imperative for richer smart contracts and on-chain computation is escalating. Our paper introduces an innovative… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17604v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17604v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17604v1-abstract-full" style="display: none;"> In this era, significant transformations in industries and tool utilization are driven by AI/Large Language Models (LLMs) and advancements in Machine Learning. There's a growing emphasis on Machine Learning Operations(MLOps) for managing and deploying these AI models. Concurrently, the imperative for richer smart contracts and on-chain computation is escalating. Our paper introduces an innovative framework that integrates blockchain technology, particularly the Cosmos SDK, to facilitate on-chain AI inferences. This system, built on WebAssembly (WASM), enables interchain communication and deployment of WASM modules executing AI inferences across multiple blockchain nodes. We critically assess the framework from feasibility, scalability, and model security, with a special focus on its portability and engine-model agnostic deployment. The capability to support AI on-chain may enhance and expand the scope of smart contracts, and as a result enable new use cases and applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17604v1-abstract-full').style.display = 'none'; document.getElementById('2502.17604v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14354">arXiv:2502.14354</a> <span> [<a href="https://arxiv.org/pdf/2502.14354">pdf</a>, <a href="https://arxiv.org/format/2502.14354">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Self-Improvement Towards Pareto Optimality: Mitigating Preference Conflicts in Multi-Objective Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+M">Moxin Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuantao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenjie Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wentao Shi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhuo Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+F">Fuli Feng</a>, <a href="/search/cs?searchtype=author&query=Chua%2C+T">Tat-Seng Chua</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14354v1-abstract-short" style="display: inline;"> Multi-Objective Alignment (MOA) aims to align LLMs' responses with multiple human preference objectives, with Direct Preference Optimization (DPO) emerging as a prominent approach. However, we find that DPO-based MOA approaches suffer from widespread preference conflicts in the data, where different objectives favor different responses. This results in conflicting optimization directions, hinderin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14354v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14354v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14354v1-abstract-full" style="display: none;"> Multi-Objective Alignment (MOA) aims to align LLMs' responses with multiple human preference objectives, with Direct Preference Optimization (DPO) emerging as a prominent approach. However, we find that DPO-based MOA approaches suffer from widespread preference conflicts in the data, where different objectives favor different responses. This results in conflicting optimization directions, hindering the optimization on the Pareto Front. To address this, we propose to construct Pareto-optimal responses to resolve preference conflicts. To efficiently obtain and utilize such responses, we propose a self-improving DPO framework that enables LLMs to self-generate and select Pareto-optimal responses for self-supervised preference alignment. Extensive experiments on two datasets demonstrate the superior Pareto Front achieved by our framework compared to various baselines. Code is available at \url{https://github.com/zyttt-coder/SIPO}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14354v1-abstract-full').style.display = 'none'; document.getElementById('2502.14354v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14296">arXiv:2502.14296</a> <span> [<a href="https://arxiv.org/pdf/2502.14296">pdf</a>, <a href="https://arxiv.org/format/2502.14296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> On the Trustworthiness of Generative Foundation Models: Guideline, Assessment, and Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yue Huang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chujie Gao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Siyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoran Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiangqi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yujun Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanbo Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jiayi Ye</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+J">Jiawen Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qihui Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuan Li</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+H">Han Bao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhaoyi Liu</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+T">Tianrui Guan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Dongping Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ruoxi Chen</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+K">Kehan Guo</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Kuen-Yew%2C+B+H">Bryan Hooi Kuen-Yew</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+C">Caiming Xiong</a>, <a href="/search/cs?searchtype=author&query=Stengel-Eskin%2C+E">Elias Stengel-Eskin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+H">Hongzhi Yin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huan Zhang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+H">Huaxiu Yao</a> , et al. (41 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14296v1-abstract-short" style="display: inline;"> Generative Foundation Models (GenFMs) have emerged as transformative tools. However, their widespread adoption raises critical concerns regarding trustworthiness across dimensions. This paper presents a comprehensive framework to address these challenges through three key contributions. First, we systematically review global AI governance laws and policies from governments and regulatory bodies, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14296v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14296v1-abstract-full" style="display: none;"> Generative Foundation Models (GenFMs) have emerged as transformative tools. However, their widespread adoption raises critical concerns regarding trustworthiness across dimensions. This paper presents a comprehensive framework to address these challenges through three key contributions. First, we systematically review global AI governance laws and policies from governments and regulatory bodies, as well as industry practices and standards. Based on this analysis, we propose a set of guiding principles for GenFMs, developed through extensive multidisciplinary collaboration that integrates technical, ethical, legal, and societal perspectives. Second, we introduce TrustGen, the first dynamic benchmarking platform designed to evaluate trustworthiness across multiple dimensions and model types, including text-to-image, large language, and vision-language models. TrustGen leverages modular components--metadata curation, test case generation, and contextual variation--to enable adaptive and iterative assessments, overcoming the limitations of static evaluation methods. Using TrustGen, we reveal significant progress in trustworthiness while identifying persistent challenges. Finally, we provide an in-depth discussion of the challenges and future directions for trustworthy GenFMs, which reveals the complex, evolving nature of trustworthiness, highlighting the nuanced trade-offs between utility and trustworthiness, and consideration for various downstream applications, identifying persistent challenges and providing a strategic roadmap for future research. This work establishes a holistic framework for advancing trustworthiness in GenAI, paving the way for safer and more responsible integration of GenFMs into critical applications. To facilitate advancement in the community, we release the toolkit for dynamic evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14296v1-abstract-full').style.display = 'none'; document.getElementById('2502.14296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13595">arXiv:2502.13595</a> <span> [<a href="https://arxiv.org/pdf/2502.13595">pdf</a>, <a href="https://arxiv.org/format/2502.13595">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> MMTEB: Massive Multilingual Text Embedding Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Enevoldsen%2C+K">Kenneth Enevoldsen</a>, <a href="/search/cs?searchtype=author&query=Chung%2C+I">Isaac Chung</a>, <a href="/search/cs?searchtype=author&query=Kerboua%2C+I">Imene Kerboua</a>, <a href="/search/cs?searchtype=author&query=Kardos%2C+M">M谩rton Kardos</a>, <a href="/search/cs?searchtype=author&query=Mathur%2C+A">Ashwin Mathur</a>, <a href="/search/cs?searchtype=author&query=Stap%2C+D">David Stap</a>, <a href="/search/cs?searchtype=author&query=Gala%2C+J">Jay Gala</a>, <a href="/search/cs?searchtype=author&query=Siblini%2C+W">Wissam Siblini</a>, <a href="/search/cs?searchtype=author&query=Krzemi%C5%84ski%2C+D">Dominik Krzemi艅ski</a>, <a href="/search/cs?searchtype=author&query=Winata%2C+G+I">Genta Indra Winata</a>, <a href="/search/cs?searchtype=author&query=Sturua%2C+S">Saba Sturua</a>, <a href="/search/cs?searchtype=author&query=Utpala%2C+S">Saiteja Utpala</a>, <a href="/search/cs?searchtype=author&query=Ciancone%2C+M">Mathieu Ciancone</a>, <a href="/search/cs?searchtype=author&query=Schaeffer%2C+M">Marion Schaeffer</a>, <a href="/search/cs?searchtype=author&query=Sequeira%2C+G">Gabriel Sequeira</a>, <a href="/search/cs?searchtype=author&query=Misra%2C+D">Diganta Misra</a>, <a href="/search/cs?searchtype=author&query=Dhakal%2C+S">Shreeya Dhakal</a>, <a href="/search/cs?searchtype=author&query=Rystr%C3%B8m%2C+J">Jonathan Rystr酶m</a>, <a href="/search/cs?searchtype=author&query=Solomatin%2C+R">Roman Solomatin</a>, <a href="/search/cs?searchtype=author&query=%C3%87a%C4%9Fatan%2C+%C3%96">脰mer 脟a臒atan</a>, <a href="/search/cs?searchtype=author&query=Kundu%2C+A">Akash Kundu</a>, <a href="/search/cs?searchtype=author&query=Bernstorff%2C+M">Martin Bernstorff</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+S">Shitao Xiao</a>, <a href="/search/cs?searchtype=author&query=Sukhlecha%2C+A">Akshita Sukhlecha</a>, <a href="/search/cs?searchtype=author&query=Pahwa%2C+B">Bhavish Pahwa</a> , et al. (61 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13595v1-abstract-short" style="display: inline;"> Text embeddings are typically evaluated on a limited set of tasks, which are constrained by language, domain, and task diversity. To address these limitations and provide a more comprehensive evaluation, we introduce the Massive Multilingual Text Embedding Benchmark (MMTEB) - a large-scale, community-driven expansion of MTEB, covering over 500 quality-controlled evaluation tasks across 250+ langua… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13595v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13595v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13595v1-abstract-full" style="display: none;"> Text embeddings are typically evaluated on a limited set of tasks, which are constrained by language, domain, and task diversity. To address these limitations and provide a more comprehensive evaluation, we introduce the Massive Multilingual Text Embedding Benchmark (MMTEB) - a large-scale, community-driven expansion of MTEB, covering over 500 quality-controlled evaluation tasks across 250+ languages. MMTEB includes a diverse set of challenging, novel tasks such as instruction following, long-document retrieval, and code retrieval, representing the largest multilingual collection of evaluation tasks for embedding models to date. Using this collection, we develop several highly multilingual benchmarks, which we use to evaluate a representative set of models. We find that while large language models (LLMs) with billions of parameters can achieve state-of-the-art performance on certain language subsets and task categories, the best-performing publicly available model is multilingual-e5-large-instruct with only 560 million parameters. To facilitate accessibility and reduce computational cost, we introduce a novel downsampling method based on inter-task correlation, ensuring a diverse selection while preserving relative model rankings. Furthermore, we optimize tasks such as retrieval by sampling hard negatives, creating smaller but effective splits. These optimizations allow us to introduce benchmarks that drastically reduce computational demands. For instance, our newly introduced zero-shot English benchmark maintains a ranking order similar to the full-scale version but at a fraction of the computational cost. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13595v1-abstract-full').style.display = 'none'; document.getElementById('2502.13595v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for ICLR: https://openreview.net/forum?id=zl3pfz4VCV</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11375">arXiv:2502.11375</a> <span> [<a href="https://arxiv.org/pdf/2502.11375">pdf</a>, <a href="https://arxiv.org/format/2502.11375">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Robot Deformable Object Manipulation via NMPC-generated Demonstrations in Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zihao Dong</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+H">Hongliang Lei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zejia Zhang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weizhuang Shi</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+W">Wei Luo</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+W">Weiwei Wan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jian Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11375v1-abstract-short" style="display: inline;"> In this work, we conducted research on deformable object manipulation by robots based on demonstration-enhanced reinforcement learning (RL). To improve the learning efficiency of RL, we enhanced the utilization of demonstration data from multiple aspects and proposed the HGCR-DDPG algorithm. It uses a novel high-dimensional fuzzy approach for grasping-point selection, a refined behavior-cloning me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11375v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11375v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11375v1-abstract-full" style="display: none;"> In this work, we conducted research on deformable object manipulation by robots based on demonstration-enhanced reinforcement learning (RL). To improve the learning efficiency of RL, we enhanced the utilization of demonstration data from multiple aspects and proposed the HGCR-DDPG algorithm. It uses a novel high-dimensional fuzzy approach for grasping-point selection, a refined behavior-cloning method to enhance data-driven learning in Rainbow-DDPG, and a sequential policy-learning strategy. Compared to the baseline algorithm (Rainbow-DDPG), our proposed HGCR-DDPG achieved 2.01 times the global average reward and reduced the global average standard deviation to 45% of that of the baseline algorithm. To reduce the human labor cost of demonstration collection, we proposed a low-cost demonstration collection method based on Nonlinear Model Predictive Control (NMPC). Simulation experiment results show that demonstrations collected through NMPC can be used to train HGCR-DDPG, achieving comparable results to those obtained with human demonstrations. To validate the feasibility of our proposed methods in real-world environments, we conducted physical experiments involving deformable object manipulation. We manipulated fabric to perform three tasks: diagonal folding, central axis folding, and flattening. The experimental results demonstrate that our proposed method achieved success rates of 83.3%, 80%, and 100% for these three tasks, respectively, validating the effectiveness of our approach. Compared to current large-model approaches for robot manipulation, the proposed algorithm is lightweight, requires fewer computational resources, and offers task-specific customization and efficient adaptability for specific tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11375v1-abstract-full').style.display = 'none'; document.getElementById('2502.11375v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08502">arXiv:2502.08502</a> <span> [<a href="https://arxiv.org/pdf/2502.08502">pdf</a>, <a href="https://arxiv.org/ps/2502.08502">ps</a>, <a href="https://arxiv.org/format/2502.08502">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> On the Fundamental Limits of Integrated Sensing and Communications Under Logarithmic Loss </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jun Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lei Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yonglong Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wuxian Shi</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yiqun Ge</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+W">Wen Tong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08502v1-abstract-short" style="display: inline;"> We study a unified information-theoretic framework for integrated sensing and communications (ISAC), applicable to both monostatic and bistatic sensing scenarios. Special attention is given to the case where the sensing receiver (Rx) is required to produce a "soft" estimate of the state sequence, with logarithmic loss serving as the performance metric. We derive lower and upper bounds on the capac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08502v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08502v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08502v1-abstract-full" style="display: none;"> We study a unified information-theoretic framework for integrated sensing and communications (ISAC), applicable to both monostatic and bistatic sensing scenarios. Special attention is given to the case where the sensing receiver (Rx) is required to produce a "soft" estimate of the state sequence, with logarithmic loss serving as the performance metric. We derive lower and upper bounds on the capacity-distortion function, which delineates the fundamental tradeoff between communication rate and sensing distortion. These bounds coincide when the channel between the ISAC transmitter (Tx) and the communication Rx is degraded with respect to the channel between the ISAC Tx and the sensing Rx, or vice versa. Furthermore, we provide a complete characterization of the capacity-distortion function for an ISAC system that simultaneously transmits information over a binary-symmetric channel and senses additive Bernoulli states through another binary-symmetric channel. The Gaussian counterpart of this problem is also explored, which, together with a state-splitting trick, fully determines the capacity-distortion-power function under the squared error distortion measure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08502v1-abstract-full').style.display = 'none'; document.getElementById('2502.08502v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04510">arXiv:2502.04510</a> <span> [<a href="https://arxiv.org/pdf/2502.04510">pdf</a>, <a href="https://arxiv.org/format/2502.04510">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Heterogeneous Swarms: Jointly Optimizing Model Roles and Weights for Multi-LLM Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+S">Shangbin Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zifeng Wang</a>, <a href="/search/cs?searchtype=author&query=Goyal%2C+P">Palash Goyal</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yike Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+H">Huang Xia</a>, <a href="/search/cs?searchtype=author&query=Palangi%2C+H">Hamid Palangi</a>, <a href="/search/cs?searchtype=author&query=Zettlemoyer%2C+L">Luke Zettlemoyer</a>, <a href="/search/cs?searchtype=author&query=Tsvetkov%2C+Y">Yulia Tsvetkov</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chen-Yu Lee</a>, <a href="/search/cs?searchtype=author&query=Pfister%2C+T">Tomas Pfister</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04510v1-abstract-short" style="display: inline;"> We propose Heterogeneous Swarms, an algorithm to design multi-LLM systems by jointly optimizing model roles and weights. We represent multi-LLM systems as directed acyclic graphs (DAGs) of LLMs with topological message passing for collaborative generation. Given a pool of LLM experts and a utility function, Heterogeneous Swarms employs two iterative steps: role-step and weight-step. For role-step,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04510v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04510v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04510v1-abstract-full" style="display: none;"> We propose Heterogeneous Swarms, an algorithm to design multi-LLM systems by jointly optimizing model roles and weights. We represent multi-LLM systems as directed acyclic graphs (DAGs) of LLMs with topological message passing for collaborative generation. Given a pool of LLM experts and a utility function, Heterogeneous Swarms employs two iterative steps: role-step and weight-step. For role-step, we interpret model roles as learning a DAG that specifies the flow of inputs and outputs between LLMs. Starting from a swarm of random continuous adjacency matrices, we decode them into discrete DAGs, call the LLMs in topological order, evaluate on the utility function (e.g. accuracy on a task), and optimize the adjacency matrices with particle swarm optimization based on the utility score. For weight-step, we assess the contribution of individual LLMs in the multi-LLM systems and optimize model weights with swarm intelligence. We propose JFK-score to quantify the individual contribution of each LLM in the best-found DAG of the role-step, then optimize model weights with particle swarm optimization based on the JFK-score. Experiments demonstrate that Heterogeneous Swarms outperforms 15 role- and/or weight-based baselines by 18.5% on average across 12 tasks. Further analysis reveals that Heterogeneous Swarms discovers multi-LLM systems with heterogeneous model roles and substantial collaborative gains, and benefits from the diversity of language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04510v1-abstract-full').style.display = 'none'; document.getElementById('2502.04510v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04506">arXiv:2502.04506</a> <span> [<a href="https://arxiv.org/pdf/2502.04506">pdf</a>, <a href="https://arxiv.org/format/2502.04506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> When One LLM Drools, Multi-LLM Collaboration Rules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+S">Shangbin Feng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+W">Wenxuan Ding</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Alisa Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zifeng Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yike Wang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zejiang Shen</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xiaochuang Han</a>, <a href="/search/cs?searchtype=author&query=Lang%2C+H">Hunter Lang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chen-Yu Lee</a>, <a href="/search/cs?searchtype=author&query=Pfister%2C+T">Tomas Pfister</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Tsvetkov%2C+Y">Yulia Tsvetkov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04506v1-abstract-short" style="display: inline;"> This position paper argues that in many realistic (i.e., complex, contextualized, subjective) scenarios, one LLM is not enough to produce a reliable output. We challenge the status quo of relying solely on a single general-purpose LLM and argue for multi-LLM collaboration to better represent the extensive diversity of data, skills, and people. We first posit that a single LLM underrepresents real-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04506v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04506v1-abstract-full" style="display: none;"> This position paper argues that in many realistic (i.e., complex, contextualized, subjective) scenarios, one LLM is not enough to produce a reliable output. We challenge the status quo of relying solely on a single general-purpose LLM and argue for multi-LLM collaboration to better represent the extensive diversity of data, skills, and people. We first posit that a single LLM underrepresents real-world data distributions, heterogeneous skills, and pluralistic populations, and that such representation gaps cannot be trivially patched by further training a single LLM. We then organize existing multi-LLM collaboration methods into a hierarchy, based on the level of access and information exchange, ranging from API-level, text-level, logit-level, to weight-level collaboration. Based on these methods, we highlight how multi-LLM collaboration addresses challenges that a single LLM struggles with, such as reliability, democratization, and pluralism. Finally, we identify the limitations of existing multi-LLM methods and motivate future work. We envision multi-LLM collaboration as an essential path toward compositional intelligence and collaborative AI development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04506v1-abstract-full').style.display = 'none'; document.getElementById('2502.04506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00955">arXiv:2502.00955</a> <span> [<a href="https://arxiv.org/pdf/2502.00955">pdf</a>, <a href="https://arxiv.org/format/2502.00955">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Efficient Multi-Agent System Training with Data Influence-Oriented Tree Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wentao Shi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zichun Yu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+F">Fuli Feng</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiangnan He</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+C">Chenyan Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00955v1-abstract-short" style="display: inline;"> Monte Carlo Tree Search (MCTS) based methods provide promising approaches for generating synthetic data to enhance the self-training of Large Language Model (LLM) based multi-agent systems (MAS). These methods leverage Q-values to estimate individual agent contributions. However, relying solely on Q-values to identify informative data may misalign with the data synthesis objective, as the focus sh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00955v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00955v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00955v1-abstract-full" style="display: none;"> Monte Carlo Tree Search (MCTS) based methods provide promising approaches for generating synthetic data to enhance the self-training of Large Language Model (LLM) based multi-agent systems (MAS). These methods leverage Q-values to estimate individual agent contributions. However, relying solely on Q-values to identify informative data may misalign with the data synthesis objective, as the focus should be on selecting data that best enhances model training. To address this discrepancy, we propose Data Influence-oriented Tree Search (DITS), a novel framework that incorporates influence scores to guide both tree search and data selection. By leveraging influence scores, we effectively identify the most impactful data for system improvement, thereby enhancing model performance. Furthermore, we derive influence score estimation methods tailored for non-differentiable metrics, significantly reducing computational overhead by utilizing inference computations. Extensive experiments on eight multi-agent datasets demonstrate the robustness and effectiveness of the proposed methods. Notably, our findings reveal that allocating more inference resources to estimate influence scores, rather than Q-values, during data synthesis can more effectively and efficiently enhance model training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00955v1-abstract-full').style.display = 'none'; document.getElementById('2502.00955v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19393">arXiv:2501.19393</a> <span> [<a href="https://arxiv.org/pdf/2501.19393">pdf</a>, <a href="https://arxiv.org/format/2501.19393">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> s1: Simple test-time scaling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Muennighoff%2C+N">Niklas Muennighoff</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zitong Yang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X+L">Xiang Lisa Li</a>, <a href="/search/cs?searchtype=author&query=Fei-Fei%2C+L">Li Fei-Fei</a>, <a href="/search/cs?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a>, <a href="/search/cs?searchtype=author&query=Zettlemoyer%2C+L">Luke Zettlemoyer</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+P">Percy Liang</a>, <a href="/search/cs?searchtype=author&query=Cand%C3%A8s%2C+E">Emmanuel Cand猫s</a>, <a href="/search/cs?searchtype=author&query=Hashimoto%2C+T">Tatsunori Hashimoto</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19393v2-abstract-short" style="display: inline;"> Test-time scaling is a promising new approach to language modeling that uses extra test-time compute to improve performance. Recently, OpenAI's o1 model showed this capability but did not publicly share its methodology, leading to many replication efforts. We seek the simplest approach to achieve test-time scaling and strong reasoning performance. First, we curate a small dataset s1K of 1,000 ques… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19393v2-abstract-full').style.display = 'inline'; document.getElementById('2501.19393v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19393v2-abstract-full" style="display: none;"> Test-time scaling is a promising new approach to language modeling that uses extra test-time compute to improve performance. Recently, OpenAI's o1 model showed this capability but did not publicly share its methodology, leading to many replication efforts. We seek the simplest approach to achieve test-time scaling and strong reasoning performance. First, we curate a small dataset s1K of 1,000 questions paired with reasoning traces relying on three criteria we validate through ablations: difficulty, diversity, and quality. Second, we develop budget forcing to control test-time compute by forcefully terminating the model's thinking process or lengthening it by appending "Wait" multiple times to the model's generation when it tries to end. This can lead the model to double-check its answer, often fixing incorrect reasoning steps. After supervised finetuning the Qwen2.5-32B-Instruct language model on s1K and equipping it with budget forcing, our model s1-32B exceeds o1-preview on competition math questions by up to 27% (MATH and AIME24). Further, scaling s1-32B with budget forcing allows extrapolating beyond its performance without test-time intervention: from 50% to 57% on AIME24. Our model, data, and code are open-source at https://github.com/simplescaling/s1 <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19393v2-abstract-full').style.display = 'none'; document.getElementById('2501.19393v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">45 pages (9 main), 10 figures, 14 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17851">arXiv:2501.17851</a> <span> [<a href="https://arxiv.org/pdf/2501.17851">pdf</a>, <a href="https://arxiv.org/format/2501.17851">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> UGSim: Autonomous Buoyancy-Driven Underwater Glider Simulator with LQR Control Strategy and Recursive Guidance System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhizun Xu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yang Song</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jiabao Zhu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weichao Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17851v1-abstract-short" style="display: inline;"> This paper presents the UGSim, a simulator for buoyancy-driven gliders, with a LQR control strategy, and a recursive guidance system. Building on the top of the DAVE and the UUVsim, it is designed to address unique challenges that come from the complex hydrodynamic and hydrostatic impacts on buoyancy-driven gliders, which conventional robotics simulators can't deal with. Since distinguishing featu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17851v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17851v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17851v1-abstract-full" style="display: none;"> This paper presents the UGSim, a simulator for buoyancy-driven gliders, with a LQR control strategy, and a recursive guidance system. Building on the top of the DAVE and the UUVsim, it is designed to address unique challenges that come from the complex hydrodynamic and hydrostatic impacts on buoyancy-driven gliders, which conventional robotics simulators can't deal with. Since distinguishing features of the class of vehicles, general controllers and guidance systems developed for underwater robotics are infeasible. The simulator is provided to accelerate the development and the evaluation of algorithms that would otherwise require expensive and time-consuming operations at sea. It consists of a basic kinetic module, a LQR control module and a recursive guidance module, which allows the user to concentrate on the single problem rather than the whole robotics system and the software infrastructure. We demonstrate the usage of the simulator through an example, loading the configuration of the buoyancy-driven glider named Petrel-II, presenting its dynamics simulation, performances of the control strategy and the guidance system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17851v1-abstract-full').style.display = 'none'; document.getElementById('2501.17851v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16081">arXiv:2501.16081</a> <span> [<a href="https://arxiv.org/pdf/2501.16081">pdf</a>, <a href="https://arxiv.org/ps/2501.16081">ps</a>, <a href="https://arxiv.org/format/2501.16081">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSP.2025.3536023">10.1109/TSP.2025.3536023 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Combating Interference for Over-the-Air Federated Learning: A Statistical Approach via RIS </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wei Shi</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+J">Jiacheng Yao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Wei Xu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jindan Xu</a>, <a href="/search/cs?searchtype=author&query=You%2C+X">Xiaohu You</a>, <a href="/search/cs?searchtype=author&query=Eldar%2C+Y+C">Yonina C. Eldar</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Chunming Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16081v1-abstract-short" style="display: inline;"> Over-the-air computation (AirComp) integrates analog communication with task-oriented computation, serving as a key enabling technique for communication-efficient federated learning (FL) over wireless networks. However, owing to its analog characteristics, AirComp-enabled FL (AirFL) is vulnerable to both unintentional and intentional interference. In this paper, we aim to attain robustness in AirC… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16081v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16081v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16081v1-abstract-full" style="display: none;"> Over-the-air computation (AirComp) integrates analog communication with task-oriented computation, serving as a key enabling technique for communication-efficient federated learning (FL) over wireless networks. However, owing to its analog characteristics, AirComp-enabled FL (AirFL) is vulnerable to both unintentional and intentional interference. In this paper, we aim to attain robustness in AirComp aggregation against interference via reconfigurable intelligent surface (RIS) technology to artificially reconstruct wireless environments. Concretely, we establish performance objectives tailored for interference suppression in wireless FL systems, aiming to achieve unbiased gradient estimation and reduce its mean square error (MSE). Oriented at these objectives, we introduce the concept of phase-manipulated favorable propagation and channel hardening for AirFL, which relies on the adjustment of RIS phase shifts to realize statistical interference elimination and reduce the error variance of gradient estimation. Building upon this concept, we propose two robust aggregation schemes of power control and RIS phase shifts design, both ensuring unbiased gradient estimation in the presence of interference. Theoretical analysis of the MSE and FL convergence affirms the anti-interference capability of the proposed schemes. It is observed that computation and interference errors diminish by an order of $\mathcal{O}\left(\frac{1}{N}\right)$ where $N$ is the number of RIS elements, and the ideal convergence rate without interference can be asymptotically achieved by increasing $N$. Numerical results confirm the analytical results and validate the superior performance of the proposed schemes over existing baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16081v1-abstract-full').style.display = 'none'; document.getElementById('2501.16081v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Signal Processing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15278">arXiv:2501.15278</a> <span> [<a href="https://arxiv.org/pdf/2501.15278">pdf</a>, <a href="https://arxiv.org/format/2501.15278">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PIP: Perturbation-based Iterative Pruning for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yi Cao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Wei-Jie Xu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yucheng Shen</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijie Shi</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+C">Chi-Min Chan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiajie Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15278v1-abstract-short" style="display: inline;"> The rapid increase in the parameter counts of Large Language Models (LLMs), reaching billions or even trillions, presents significant challenges for their practical deployment, particularly in resource-constrained environments. To ease this issue, we propose PIP (Perturbation-based Iterative Pruning), a novel double-view structured pruning method to optimize LLMs, which combines information from t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15278v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15278v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15278v1-abstract-full" style="display: none;"> The rapid increase in the parameter counts of Large Language Models (LLMs), reaching billions or even trillions, presents significant challenges for their practical deployment, particularly in resource-constrained environments. To ease this issue, we propose PIP (Perturbation-based Iterative Pruning), a novel double-view structured pruning method to optimize LLMs, which combines information from two different views: the unperturbed view and the perturbed view. With the calculation of gradient differences, PIP iteratively prunes those that struggle to distinguish between these two views. Our experiments show that PIP reduces the parameter count by approximately 20% while retaining over 85% of the original model's accuracy across varied benchmarks. In some cases, the performance of the pruned model is within 5% of the unpruned version, demonstrating PIP's ability to preserve key aspects of model effectiveness. Moreover, PIP consistently outperforms existing state-of-the-art (SOTA) structured pruning methods, establishing it as a leading technique for optimizing LLMs in environments with constrained resources. Our code is available at: https://github.com/caoyiiiiii/PIP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15278v1-abstract-full').style.display = 'none'; document.getElementById('2501.15278v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14304">arXiv:2501.14304</a> <span> [<a href="https://arxiv.org/pdf/2501.14304">pdf</a>, <a href="https://arxiv.org/format/2501.14304">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MASTER: A Multi-Agent System with LLM Specialized MCTS </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gan%2C+B">Bingzheng Gan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yufan Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jing Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yusu Li</a>, <a href="/search/cs?searchtype=author&query=Teo%2C+S+X">Shu Xian Teo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Changwang Zhang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14304v2-abstract-short" style="display: inline;"> Large Language Models (LLM) are increasingly being explored for problem-solving tasks. However, their strategic planning capability is often viewed with skepticism. Recent studies have incorporated the Monte Carlo Tree Search (MCTS) algorithm to augment the planning capacity of LLM. Despite its potential, MCTS relies on extensive sampling simulations to approximate the true reward distribution, wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14304v2-abstract-full').style.display = 'inline'; document.getElementById('2501.14304v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14304v2-abstract-full" style="display: none;"> Large Language Models (LLM) are increasingly being explored for problem-solving tasks. However, their strategic planning capability is often viewed with skepticism. Recent studies have incorporated the Monte Carlo Tree Search (MCTS) algorithm to augment the planning capacity of LLM. Despite its potential, MCTS relies on extensive sampling simulations to approximate the true reward distribution, which leads to two primary issues. Firstly, MCTS is effective for tasks like the Game of Go, where simulation results can yield objective rewards (e.g., 1 for a win and 0 for a loss). However, for tasks such as question answering, the result of a simulation is the answer to the question, which cannot yield an objective reward without the ground truth. Secondly, obtaining statistically significant reward estimations typically requires a sample size exceeding 30 simulations, resulting in excessive token usage and time consumption. To address these challenges, we present the Multi-Agent System with Tactical Execution and Reasoning using LLM Specialized MCTS (MASTER), a novel framework that coordinates agent recruitment and communication through LLM specialized MCTS. This system autonomously adjusts the number of agents based on task complexity and ensures focused communication among them. Comprehensive experiments across various tasks demonstrate the effectiveness of our proposed framework. It achieves 76% accuracy on HotpotQA and 80% on WebShop, setting new state-of-the-art performance on these datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14304v2-abstract-full').style.display = 'none'; document.getElementById('2501.14304v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by main NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14249">arXiv:2501.14249</a> <span> [<a href="https://arxiv.org/pdf/2501.14249">pdf</a>, <a href="https://arxiv.org/format/2501.14249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Humanity's Last Exam </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Gatti%2C+A">Alice Gatti</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Ziwen Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Josephina Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hugh Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C+B+C">Chen Bo Calvin Zhang</a>, <a href="/search/cs?searchtype=author&query=Shaaban%2C+M">Mohamed Shaaban</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+J">John Ling</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Sean Shi</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+M">Michael Choi</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+A">Anish Agrawal</a>, <a href="/search/cs?searchtype=author&query=Chopra%2C+A">Arnav Chopra</a>, <a href="/search/cs?searchtype=author&query=Khoja%2C+A">Adam Khoja</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+R">Ryan Kim</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+R">Richard Ren</a>, <a href="/search/cs?searchtype=author&query=Hausenloy%2C+J">Jason Hausenloy</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+O">Oliver Zhang</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tung Nguyen</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+D">Daron Anderson</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+I+A">Imad Ali Shah</a>, <a href="/search/cs?searchtype=author&query=Doroshenko%2C+M">Mikhail Doroshenko</a>, <a href="/search/cs?searchtype=author&query=Stokes%2C+A+C">Alun Cennyth Stokes</a>, <a href="/search/cs?searchtype=author&query=Mahmood%2C+M">Mobeen Mahmood</a> , et al. (709 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14249v5-abstract-short" style="display: inline;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v5-abstract-full').style.display = 'inline'; document.getElementById('2501.14249v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14249v5-abstract-full" style="display: none;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of human knowledge, designed to be the final closed-ended academic benchmark of its kind with broad subject coverage. HLE consists of 2,700 questions across dozens of subjects, including mathematics, humanities, and the natural sciences. HLE is developed globally by subject-matter experts and consists of multiple-choice and short-answer questions suitable for automated grading. Each question has a known solution that is unambiguous and easily verifiable, but cannot be quickly answered via internet retrieval. State-of-the-art LLMs demonstrate low accuracy and calibration on HLE, highlighting a significant gap between current LLM capabilities and the expert human frontier on closed-ended academic questions. To inform research and policymaking upon a clear understanding of model capabilities, we publicly release HLE at https://lastexam.ai. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v5-abstract-full').style.display = 'none'; document.getElementById('2501.14249v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12432">arXiv:2501.12432</a> <span> [<a href="https://arxiv.org/pdf/2501.12432">pdf</a>, <a href="https://arxiv.org/format/2501.12432">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Divide-Then-Aggregate: An Efficient Tool Learning Method via Parallel Tool Invocation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+D">Dongsheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weixian Shi</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhengliang Shi</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhaochun Ren</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyong Yan</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12432v1-abstract-short" style="display: inline;"> Although current Large Language Models (LLMs) exhibit impressive capabilities, performing complex real-world tasks still requires tool learning. Mainstream methods, such as CoT/ReAct, rely on step-by-step tool invocation to interact with external environments, but they are limited in perceptual scope and lack adequate task-planning capability. To address these limitations, other studies introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12432v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12432v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12432v1-abstract-full" style="display: none;"> Although current Large Language Models (LLMs) exhibit impressive capabilities, performing complex real-world tasks still requires tool learning. Mainstream methods, such as CoT/ReAct, rely on step-by-step tool invocation to interact with external environments, but they are limited in perceptual scope and lack adequate task-planning capability. To address these limitations, other studies introduce the first Search-based Decision Tree (DFSDT), which still suffers from the high computational cost. In this paper, we introduce a novel parallel tool invocation paradigm, DTA-Llama (Divide-Then-Aggregate Llama). First, we transform traditional tree-based tool search paths into Directed Acyclic Graph (DAG) structure, generating a high-quality parallel tool invocation dataset. The DTA-Llama is then trained on the dataset to learn to iteratively divide the current task into several parallel tool invocation sub-tasks and aggregate the invocation results to decide the next actions. Furthermore, we introduce an efficient inference framework inspired by the Process/Threads mechanism when applying the DTA-Llama to practical tasks. Experimental results show that our approach substantially enhances task performance while reducing token consumption and inference time. Llama2-7B, using our method, is comparable to the official parallel function calling method of GPT-3.5. The relevant code, dataset, and model weights are available at https://corn0205.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12432v1-abstract-full').style.display = 'none'; document.getElementById('2501.12432v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11848">arXiv:2501.11848</a> <span> [<a href="https://arxiv.org/pdf/2501.11848">pdf</a>, <a href="https://arxiv.org/format/2501.11848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> FedMUA: Exploring the Vulnerabilities of Federated Learning to Malicious Unlearning Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zehui Lin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wanyu Lin</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenlong Shi</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+X">Xiaoyan Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Di Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11848v1-abstract-short" style="display: inline;"> Recently, the practical needs of ``the right to be forgotten'' in federated learning gave birth to a paradigm known as federated unlearning, which enables the server to forget personal data upon the client's removal request. Existing studies on federated unlearning have primarily focused on efficiently eliminating the influence of requested data from the client's model without retraining from scra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11848v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11848v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11848v1-abstract-full" style="display: none;"> Recently, the practical needs of ``the right to be forgotten'' in federated learning gave birth to a paradigm known as federated unlearning, which enables the server to forget personal data upon the client's removal request. Existing studies on federated unlearning have primarily focused on efficiently eliminating the influence of requested data from the client's model without retraining from scratch, however, they have rarely doubted the reliability of the global model posed by the discrepancy between its prediction performance before and after unlearning. To bridge this gap, we take the first step by introducing a novel malicious unlearning attack dubbed FedMUA, aiming to unveil potential vulnerabilities emerging from federated learning during the unlearning process. The crux of FedMUA is to mislead the global model into unlearning more information associated with the influential samples for the target sample than anticipated, thus inducing adverse effects on target samples from other clients. To achieve this, we design a novel two-step method, known as Influential Sample Identification and Malicious Unlearning Generation, to identify and subsequently generate malicious feature unlearning requests within the influential samples. By doing so, we can significantly alter the predictions pertaining to the target sample by initiating the malicious feature unlearning requests, leading to the deliberate manipulation for the user adversely. Additionally, we design a new defense mechanism that is highly resilient against malicious unlearning attacks. Extensive experiments on three realistic datasets reveal that FedMUA effectively induces misclassification on target samples and can achieve an 80% attack success rate by triggering only 0.3% malicious unlearning requests. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11848v1-abstract-full').style.display = 'none'; document.getElementById('2501.11848v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05591">arXiv:2501.05591</a> <span> [<a href="https://arxiv.org/pdf/2501.05591">pdf</a>, <a href="https://arxiv.org/format/2501.05591">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Session-Level Dynamic Ad Load Optimization using Offline Robust Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tao Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qi Xu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wei Shi</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Z">Zhigang Hua</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shuang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05591v1-abstract-short" style="display: inline;"> Session-level dynamic ad load optimization aims to personalize the density and types of delivered advertisements in real time during a user's online session by dynamically balancing user experience quality and ad monetization. Traditional causal learning-based approaches struggle with key technical challenges, especially in handling confounding bias and distribution shifts. In this paper, we devel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05591v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05591v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05591v1-abstract-full" style="display: none;"> Session-level dynamic ad load optimization aims to personalize the density and types of delivered advertisements in real time during a user's online session by dynamically balancing user experience quality and ad monetization. Traditional causal learning-based approaches struggle with key technical challenges, especially in handling confounding bias and distribution shifts. In this paper, we develop an offline deep Q-network (DQN)-based framework that effectively mitigates confounding bias in dynamic systems and demonstrates more than 80% offline gains compared to the best causal learning-based production baseline. Moreover, to improve the framework's robustness against unanticipated distribution shifts, we further enhance our framework with a novel offline robust dueling DQN approach. This approach achieves more stable rewards on multiple OpenAI-Gym datasets as perturbations increase, and provides an additional 5% offline gains on real-world ad delivery data. Deployed across multiple production systems, our approach has achieved outsized topline gains. Post-launch online A/B tests have shown double-digit improvements in the engagement-ad score trade-off efficiency, significantly enhancing our platform's capability to serve both consumers and advertisers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05591v1-abstract-full').style.display = 'none'; document.getElementById('2501.05591v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Will appear in KDD 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00383">arXiv:2501.00383</a> <span> [<a href="https://arxiv.org/pdf/2501.00383">pdf</a>, <a href="https://arxiv.org/format/2501.00383">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Proactive Conversational Agents with Inner Thoughts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X+B">Xingyu Bruce Liu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+S">Shitao Fang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weiyan Shi</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chien-Sheng Wu</a>, <a href="/search/cs?searchtype=author&query=Igarashi%2C+T">Takeo Igarashi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X+A">Xiang Anthony Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00383v2-abstract-short" style="display: inline;"> One of the long-standing aspirations in conversational AI is to allow them to autonomously take initiatives in conversations, i.e., being proactive. This is especially challenging for multi-party conversations. Prior NLP research focused mainly on predicting the next speaker from contexts like preceding conversations. In this paper, we demonstrate the limitations of such methods and rethink what i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00383v2-abstract-full').style.display = 'inline'; document.getElementById('2501.00383v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00383v2-abstract-full" style="display: none;"> One of the long-standing aspirations in conversational AI is to allow them to autonomously take initiatives in conversations, i.e., being proactive. This is especially challenging for multi-party conversations. Prior NLP research focused mainly on predicting the next speaker from contexts like preceding conversations. In this paper, we demonstrate the limitations of such methods and rethink what it means for AI to be proactive in multi-party, human-AI conversations. We propose that just like humans, rather than merely reacting to turn-taking cues, a proactive AI formulates its own inner thoughts during a conversation, and seeks the right moment to contribute. Through a formative study with 24 participants and inspiration from linguistics and cognitive psychology, we introduce the Inner Thoughts framework. Our framework equips AI with a continuous, covert train of thoughts in parallel to the overt communication process, which enables it to proactively engage by modeling its intrinsic motivation to express these thoughts. We instantiated this framework into two real-time systems: an AI playground web app and a chatbot. Through a technical evaluation and user studies with human participants, our framework significantly surpasses existing baselines on aspects like anthropomorphism, coherence, intelligence, and turn-taking appropriateness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00383v2-abstract-full').style.display = 'none'; document.getElementById('2501.00383v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17872">arXiv:2412.17872</a> <span> [<a href="https://arxiv.org/pdf/2412.17872">pdf</a>, <a href="https://arxiv.org/format/2412.17872">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Joint Knowledge Editing for Information Enrichment and Probability Promotion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenhang Shi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yiren Chen</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+S">Shuqing Bian</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhe Zhao</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+P">Pengfei Hu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+W">Wei Lu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xiaoyong Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17872v1-abstract-short" style="display: inline;"> Knowledge stored in large language models requires timely updates to reflect the dynamic nature of real-world information. To update the knowledge, most knowledge editing methods focus on the low layers, since recent probes into the knowledge recall process reveal that the answer information is enriched in low layers. However, these probes only and could only reveal critical recall stages for the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17872v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17872v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17872v1-abstract-full" style="display: none;"> Knowledge stored in large language models requires timely updates to reflect the dynamic nature of real-world information. To update the knowledge, most knowledge editing methods focus on the low layers, since recent probes into the knowledge recall process reveal that the answer information is enriched in low layers. However, these probes only and could only reveal critical recall stages for the original answers, while the goal of editing is to rectify model's prediction for the target answers. This inconsistency indicates that both the probe approaches and the associated editing methods are deficient. To mitigate the inconsistency and identify critical editing regions, we propose a contrast-based probe approach, and locate two crucial stages where the model behavior diverges between the original and target answers: Information Enrichment in low layers and Probability Promotion in high layers. Building upon the insights, we develop the Joint knowledge Editing for information Enrichment and probability Promotion (JEEP) method, which jointly edits both the low and high layers to modify the two critical recall stages. Considering the mutual interference and growing forgetting due to dual modifications, JEEP is designed to ensure that updates to distinct regions share the same objectives and are complementary. We rigorously evaluate JEEP by editing up to thousands of facts on various models, i.e., GPT-J (6B) and LLaMA (7B), and addressing diverse editing objectives, i.e., adding factual and counterfactual knowledge. In all tested scenarios, JEEP achieves best performances, validating the effectiveness of the revealings of our probe approach and the designs of our editing method. Our code and data are available at https://github.com/Eric8932/JEEP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17872v1-abstract-full').style.display = 'none'; document.getElementById('2412.17872v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.15188">arXiv:2412.15188</a> <span> [<a href="https://arxiv.org/pdf/2412.15188">pdf</a>, <a href="https://arxiv.org/format/2412.15188">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LMFusion: Adapting Pretrained Language Models for Multimodal Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xiaochuang Han</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chunting Zhou</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+W">Weixin Liang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X+V">Xi Victoria Lin</a>, <a href="/search/cs?searchtype=author&query=Zettlemoyer%2C+L">Luke Zettlemoyer</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lili Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.15188v4-abstract-short" style="display: inline;"> We present LMFusion, a framework for empowering pretrained text-only large language models (LLMs) with multimodal generative capabilities, enabling them to understand and generate both text and images in arbitrary sequences. LMFusion leverages existing Llama-3's weights for processing texts autoregressively while introducing additional and parallel transformer modules for processing images with di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15188v4-abstract-full').style.display = 'inline'; document.getElementById('2412.15188v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.15188v4-abstract-full" style="display: none;"> We present LMFusion, a framework for empowering pretrained text-only large language models (LLMs) with multimodal generative capabilities, enabling them to understand and generate both text and images in arbitrary sequences. LMFusion leverages existing Llama-3's weights for processing texts autoregressively while introducing additional and parallel transformer modules for processing images with diffusion. During training, the data from each modality is routed to its dedicated modules: modality-specific feedforward layers, query-key-value projections, and normalization layers process each modality independently, while the shared self-attention layers allow interactions across text and image features. By freezing the text-specific modules and only training the image-specific modules, LMFusion preserves the language capabilities of text-only LLMs while developing strong visual understanding and generation abilities. Compared to methods that pretrain multimodal generative models from scratch, our experiments demonstrate that, LMFusion improves image understanding by 20% and image generation by 3.6% using only 50% of the FLOPs while maintaining Llama-3's language capabilities. We also demonstrate that this framework can adapt existing vision-language models with multimodal generation ability. Overall, this framework not only leverages existing computational investments in text-only LLMs but also enables the parallel development of language and vision capabilities, presenting a promising direction for efficient multimodal model development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15188v4-abstract-full').style.display = 'none'; document.getElementById('2412.15188v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Name change: LlamaFusion to LMFusion</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12513">arXiv:2412.12513</a> <span> [<a href="https://arxiv.org/pdf/2412.12513">pdf</a>, <a href="https://arxiv.org/format/2412.12513">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Generating Move Smart Contracts based on Concepts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Karanjai%2C+R">Rabimba Karanjai</a>, <a href="/search/cs?searchtype=author&query=Blackshear%2C+S">Sam Blackshear</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Lei Xu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weidong Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12513v1-abstract-short" style="display: inline;"> The growing adoption of formal verification for smart contracts has spurred the development of new verifiable languages like Move. However, the limited availability of training data for these languages hinders effective code generation by large language models (LLMs). This paper presents ConMover, a novel framework that enhances LLM-based code generation for Move by leveraging a knowledge graph of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12513v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12513v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12513v1-abstract-full" style="display: none;"> The growing adoption of formal verification for smart contracts has spurred the development of new verifiable languages like Move. However, the limited availability of training data for these languages hinders effective code generation by large language models (LLMs). This paper presents ConMover, a novel framework that enhances LLM-based code generation for Move by leveraging a knowledge graph of Move concepts and a small set of verified code examples. ConMover integrates concept retrieval, planning, coding, and debugging agents in an iterative process to refine generated code. Evaluations with various open-source LLMs demonstrate substantial accuracy improvements over baseline models. These results underscore ConMover's potential to address low-resource code generation challenges, bridging the gap between natural language descriptions and reliable smart contract development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12513v1-abstract-full').style.display = 'none'; document.getElementById('2412.12513v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12501">arXiv:2412.12501</a> <span> [<a href="https://arxiv.org/pdf/2412.12501">pdf</a>, <a href="https://arxiv.org/format/2412.12501">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unleashing the Potential of Model Bias for Generalized Category Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=An%2C+W">Wenbin An</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Haonan Lin</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jiahao Nie</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+F">Feng Tian</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenkai Shi</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yaqiang Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qianying Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+P">Ping Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12501v1-abstract-short" style="display: inline;"> Generalized Category Discovery is a significant and complex task that aims to identify both known and undefined novel categories from a set of unlabeled data, leveraging another labeled dataset containing only known categories. The primary challenges stem from model bias induced by pre-training on only known categories and the lack of precise supervision for novel ones, leading to category bias to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12501v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12501v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12501v1-abstract-full" style="display: none;"> Generalized Category Discovery is a significant and complex task that aims to identify both known and undefined novel categories from a set of unlabeled data, leveraging another labeled dataset containing only known categories. The primary challenges stem from model bias induced by pre-training on only known categories and the lack of precise supervision for novel ones, leading to category bias towards known categories and category confusion among different novel categories, which hinders models' ability to identify novel categories effectively. To address these challenges, we propose a novel framework named Self-Debiasing Calibration (SDC). Unlike prior methods that regard model bias towards known categories as an obstacle to novel category identification, SDC provides a novel insight into unleashing the potential of the bias to facilitate novel category learning. Specifically, the output of the biased model serves two key purposes. First, it provides an accurate modeling of category bias, which can be utilized to measure the degree of bias and debias the output of the current training model. Second, it offers valuable insights for distinguishing different novel categories by transferring knowledge between similar categories. Based on these insights, SDC dynamically adjusts the output logits of the current training model using the output of the biased model. This approach produces less biased logits to effectively address the issue of category bias towards known categories, and generates more accurate pseudo labels for unlabeled data, thereby mitigating category confusion for novel categories. Experiments on three benchmark datasets show that SDC outperforms SOTA methods, especially in the identification of novel categories. Our code and data are available at \url{https://github.com/Lackel/SDC}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12501v1-abstract-full').style.display = 'none'; document.getElementById('2412.12501v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12364">arXiv:2412.12364</a> <span> [<a href="https://arxiv.org/pdf/2412.12364">pdf</a>, <a href="https://arxiv.org/format/2412.12364">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3672608.3707883">10.1145/3672608.3707883 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> LogBabylon: A Unified Framework for Cross-Log File Integration and Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Karanjai%2C+R">Rabimba Karanjai</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yang Lu</a>, <a href="/search/cs?searchtype=author&query=Alsagheer%2C+D">Dana Alsagheer</a>, <a href="/search/cs?searchtype=author&query=Kasichainula%2C+K">Keshav Kasichainula</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Lei Xu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weidong Shi</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S+S">Shou-Hsuan Stephen Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12364v1-abstract-short" style="display: inline;"> Logs are critical resources that record events, activities, or messages produced by software applications, operating systems, servers, and network devices. However, consolidating the heterogeneous logs and cross-referencing them is challenging and complicated. Manually analyzing the log data is time-consuming and prone to errors. LogBabylon is a centralized log data consolidating solution that lev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12364v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12364v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12364v1-abstract-full" style="display: none;"> Logs are critical resources that record events, activities, or messages produced by software applications, operating systems, servers, and network devices. However, consolidating the heterogeneous logs and cross-referencing them is challenging and complicated. Manually analyzing the log data is time-consuming and prone to errors. LogBabylon is a centralized log data consolidating solution that leverages Large Language Models (LLMs) integrated with Retrieval-Augmented Generation (RAG) technology. LogBabylon interprets the log data in a human-readable way and adds insight analysis of the system performance and anomaly alerts. It provides a paramount view of the system landscape, enabling proactive management and rapid incident response. LogBabylon consolidates diverse log sources and enhances the extracted information's accuracy and relevancy. This facilitates a deeper understanding of log data, supporting more effective decision-making and operational efficiency. Furthermore, LogBabylon streamlines the log analysis process, significantly reducing the time and effort required to interpret complex datasets. Its capabilities extend to generating context-aware insights, offering an invaluable tool for continuous monitoring, performance optimization, and security assurance in dynamic computing environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12364v1-abstract-full').style.display = 'none'; document.getElementById('2412.12364v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11807">arXiv:2412.11807</a> <span> [<a href="https://arxiv.org/pdf/2412.11807">pdf</a>, <a href="https://arxiv.org/format/2412.11807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PhysAug: A Physical-guided and Frequency-based Data Augmentation for Single-Domain Generalized Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaoran Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiangang Yang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenhui Shi</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+S">Siyuan Ding</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Luqing Luo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jian Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11807v2-abstract-short" style="display: inline;"> Single-Domain Generalized Object Detection~(S-DGOD) aims to train on a single source domain for robust performance across a variety of unseen target domains by taking advantage of an object detector. Existing S-DGOD approaches often rely on data augmentation strategies, including a composition of visual transformations, to enhance the detector's generalization ability. However, the absence of real… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11807v2-abstract-full').style.display = 'inline'; document.getElementById('2412.11807v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11807v2-abstract-full" style="display: none;"> Single-Domain Generalized Object Detection~(S-DGOD) aims to train on a single source domain for robust performance across a variety of unseen target domains by taking advantage of an object detector. Existing S-DGOD approaches often rely on data augmentation strategies, including a composition of visual transformations, to enhance the detector's generalization ability. However, the absence of real-world prior knowledge hinders data augmentation from contributing to the diversity of training data distributions. To address this issue, we propose PhysAug, a novel physical model-based non-ideal imaging condition data augmentation method, to enhance the adaptability of the S-DGOD tasks. Drawing upon the principles of atmospheric optics, we develop a universal perturbation model that serves as the foundation for our proposed PhysAug. Given that visual perturbations typically arise from the interaction of light with atmospheric particles, the image frequency spectrum is harnessed to simulate real-world variations during training. This approach fosters the detector to learn domain-invariant representations, thereby enhancing its ability to generalize across various settings. Without altering the network architecture or loss function, our approach significantly outperforms the state-of-the-art across various S-DGOD datasets. In particular, it achieves a substantial improvement of $7.3\%$ and $7.2\%$ over the baseline on DWD and Cityscape-C, highlighting its enhanced generalizability in real-world settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11807v2-abstract-full').style.display = 'none'; document.getElementById('2412.11807v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to AAAI,2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10455">arXiv:2412.10455</a> <span> [<a href="https://arxiv.org/pdf/2412.10455">pdf</a>, <a href="https://arxiv.org/format/2412.10455">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3688866.3689124">10.1145/3688866.3689124 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Geo-LLaVA: A Large Multi-Modal Model for Solving Geometry Math Problems with Meta In-Context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+S">Shihao Xu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yiyang Luo</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10455v1-abstract-short" style="display: inline;"> Geometry mathematics problems pose significant challenges for large language models (LLMs) because they involve visual elements and spatial reasoning. Current methods primarily rely on symbolic character awareness to address these problems. Considering geometry problem solving is a relatively nascent field with limited suitable datasets and currently almost no work on solid geometry problem solvin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10455v1-abstract-full').style.display = 'inline'; document.getElementById('2412.10455v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10455v1-abstract-full" style="display: none;"> Geometry mathematics problems pose significant challenges for large language models (LLMs) because they involve visual elements and spatial reasoning. Current methods primarily rely on symbolic character awareness to address these problems. Considering geometry problem solving is a relatively nascent field with limited suitable datasets and currently almost no work on solid geometry problem solving, we collect a geometry question-answer dataset by sourcing geometric data from Chinese high school education websites, referred to as GeoMath. It contains solid geometry questions and answers with accurate reasoning steps as compensation for existing plane geometry datasets. Additionally, we propose a Large Multi-modal Model (LMM) framework named Geo-LLaVA, which incorporates retrieval augmentation with supervised fine-tuning (SFT) in the training stage, called meta-training, and employs in-context learning (ICL) during inference to improve performance. Our fine-tuned model with ICL attains the state-of-the-art performance of 65.25% and 42.36% on selected questions of the GeoQA dataset and GeoMath dataset respectively with proper inference steps. Notably, our model initially endows the ability to solve solid geometry problems and supports the generation of reasonable solid geometry picture descriptions and problem-solving steps. Our research sets the stage for further exploration of LLMs in multi-modal math problem-solving, particularly in geometry math problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10455v1-abstract-full').style.display = 'none'; document.getElementById('2412.10455v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09424">arXiv:2412.09424</a> <span> [<a href="https://arxiv.org/pdf/2412.09424">pdf</a>, <a href="https://arxiv.org/format/2412.09424">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Slope Considered Online Nonlinear Trajectory Planning with Differential Energy Model for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tian%2C+Z">Zhaofeng Tian</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lichen Xia</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weisong Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09424v1-abstract-short" style="display: inline;"> Achieving energy-efficient trajectory planning for autonomous driving remains a challenge due to the limitations of model-agnostic approaches. This study addresses this gap by introducing an online nonlinear programming trajectory optimization framework that integrates a differentiable energy model into autonomous systems. By leveraging traffic and slope profile predictions within a safety-critica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09424v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09424v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09424v1-abstract-full" style="display: none;"> Achieving energy-efficient trajectory planning for autonomous driving remains a challenge due to the limitations of model-agnostic approaches. This study addresses this gap by introducing an online nonlinear programming trajectory optimization framework that integrates a differentiable energy model into autonomous systems. By leveraging traffic and slope profile predictions within a safety-critical framework, the proposed method enhances fuel efficiency for both sedans and diesel trucks by 3.71\% and 7.15\%, respectively, when compared to traditional model-agnostic quadratic programming techniques. These improvements translate to a potential \$6.14 billion economic benefit for the U.S. trucking industry. This work bridges the gap between model-agnostic autonomous driving and model-aware ECO-driving, highlighting a practical pathway for integrating energy efficiency into real-time trajectory planning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09424v1-abstract-full').style.display = 'none'; document.getElementById('2412.09424v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08830">arXiv:2412.08830</a> <span> [<a href="https://arxiv.org/pdf/2412.08830">pdf</a>, <a href="https://arxiv.org/format/2412.08830">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> EMATO: Energy-Model-Aware Trajectory Optimization for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tian%2C+Z">Zhaofeng Tian</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lichen Xia</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weisong Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08830v1-abstract-short" style="display: inline;"> Autonomous driving lacks strong proof of energy efficiency with the energy-model-agnostic trajectory planning. To achieve an energy consumption model-aware trajectory planning for autonomous driving, this study proposes an online nonlinear programming method that optimizes the polynomial trajectories generated by the Frenet polynomial method while considering both traffic trajectories and road slo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08830v1-abstract-full').style.display = 'inline'; document.getElementById('2412.08830v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08830v1-abstract-full" style="display: none;"> Autonomous driving lacks strong proof of energy efficiency with the energy-model-agnostic trajectory planning. To achieve an energy consumption model-aware trajectory planning for autonomous driving, this study proposes an online nonlinear programming method that optimizes the polynomial trajectories generated by the Frenet polynomial method while considering both traffic trajectories and road slope prediction. This study further investigates how the energy model can be leveraged in different driving conditions to achieve higher energy efficiency. Case studies, quantitative studies, and ablation studies are conducted in a sedan and truck model to prove the effectiveness of the method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08830v1-abstract-full').style.display = 'none'; document.getElementById('2412.08830v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01339">arXiv:2412.01339</a> <span> [<a href="https://arxiv.org/pdf/2412.01339">pdf</a>, <a href="https://arxiv.org/format/2412.01339">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Negative Token Merging: Image-based Adversarial Feature Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+J">Jaskirat Singh</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lindsey Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+R">Ranjay Krishna</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Koh%2C+P+W">Pang Wei Koh</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+M+F">Michael F. Cohen</a>, <a href="/search/cs?searchtype=author&query=Gould%2C+S">Stephen Gould</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+L">Liang Zheng</a>, <a href="/search/cs?searchtype=author&query=Zettlemoyer%2C+L">Luke Zettlemoyer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01339v2-abstract-short" style="display: inline;"> Text-based adversarial guidance using a negative prompt has emerged as a widely adopted approach to steer diffusion models away from producing undesired concepts. While useful, performing adversarial guidance using text alone can be insufficient to capture complex visual concepts or avoid specific visual elements like copyrighted characters. In this paper, for the first time we explore an alternat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01339v2-abstract-full').style.display = 'inline'; document.getElementById('2412.01339v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01339v2-abstract-full" style="display: none;"> Text-based adversarial guidance using a negative prompt has emerged as a widely adopted approach to steer diffusion models away from producing undesired concepts. While useful, performing adversarial guidance using text alone can be insufficient to capture complex visual concepts or avoid specific visual elements like copyrighted characters. In this paper, for the first time we explore an alternate modality in this direction by performing adversarial guidance directly using visual features from a reference image or other images in a batch. We introduce negative token merging (NegToMe), a simple but effective training-free approach which performs adversarial guidance through images by selectively pushing apart matching visual features between reference and generated images during the reverse diffusion process. By simply adjusting the used reference, NegToMe enables a diverse range of applications. Notably, when using other images in same batch as reference, we find that NegToMe significantly enhances output diversity (e.g., racial, gender, visual) by guiding features of each image away from others. Similarly, when used w.r.t. copyrighted reference images, NegToMe reduces visual similarity to copyrighted content by 34.57%. NegToMe is simple to implement using just few-lines of code, uses only marginally higher (<4%) inference time and is compatible with different diffusion architectures, including those like Flux, which don't natively support the use of a negative prompt. Code is available at https://negtome.github.io <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01339v2-abstract-full').style.display = 'none'; document.getElementById('2412.01339v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00136">arXiv:2412.00136</a> <span> [<a href="https://arxiv.org/pdf/2412.00136">pdf</a>, <a href="https://arxiv.org/format/2412.00136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FonTS: Text Rendering with Typography and Style Controls </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenda Shi</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yiren Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dengming Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaming Liu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+X">Xingxing Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00136v1-abstract-short" style="display: inline;"> Visual text images are prevalent in various applications, requiring careful font selection and typographic choices. Recent advances in Diffusion Transformer (DiT)-based text-to-image (T2I) models show promise in automating these processes. However, these methods still face challenges such as inconsistent fonts, style variation, and limited fine-grained control, particularly at the word level. This… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00136v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00136v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00136v1-abstract-full" style="display: none;"> Visual text images are prevalent in various applications, requiring careful font selection and typographic choices. Recent advances in Diffusion Transformer (DiT)-based text-to-image (T2I) models show promise in automating these processes. However, these methods still face challenges such as inconsistent fonts, style variation, and limited fine-grained control, particularly at the word level. This paper proposes a two-stage DiT-based pipeline to address these issues by enhancing controllability over typography and style in text rendering. We introduce Typography Control (TC) finetuning, an efficient parameter fine-tuning method, and enclosing typography control tokens (ETC-tokens), which enable precise word-level application of typographic features. To further enhance style control, we present a Style Control Adapter (SCA) that injects style information through image inputs independent of text prompts. Through comprehensive experiments, we demonstrate the effectiveness of our approach in achieving superior word-level typographic control, font consistency, and style consistency in Basic and Artistic Text Rendering (BTR and ATR) tasks. Our results mark a significant advancement in the precision and adaptability of T2I models, presenting new possibilities for creative applications and design-oriented tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00136v1-abstract-full').style.display = 'none'; document.getElementById('2412.00136v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18143">arXiv:2411.18143</a> <span> [<a href="https://arxiv.org/pdf/2411.18143">pdf</a>, <a href="https://arxiv.org/format/2411.18143">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Harnessing Large Language Models for Seed Generation in Greybox Fuzzing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenxuan Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yunhang Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+X">Xinyu Xing</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jun Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18143v1-abstract-short" style="display: inline;"> Greybox fuzzing has emerged as a preferred technique for discovering software bugs, striking a balance between efficiency and depth of exploration. While research has focused on improving fuzzing techniques, the importance of high-quality initial seeds remains critical yet often overlooked. Existing methods for seed generation are limited, especially for programs with non-standard or custom input… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18143v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18143v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18143v1-abstract-full" style="display: none;"> Greybox fuzzing has emerged as a preferred technique for discovering software bugs, striking a balance between efficiency and depth of exploration. While research has focused on improving fuzzing techniques, the importance of high-quality initial seeds remains critical yet often overlooked. Existing methods for seed generation are limited, especially for programs with non-standard or custom input formats. Large Language Models (LLMs) has revolutionized numerous domains, showcasing unprecedented capabilities in understanding and generating complex patterns across various fields of knowledge. This paper introduces SeedMind, a novel system that leverages LLMs to boost greybox fuzzing through intelligent seed generation. Unlike previous approaches, SeedMind employs LLMs to create test case generators rather than directly producing test cases. Our approach implements an iterative, feedback-driven process that guides the LLM to progressively refine test case generation, aiming for increased code coverage depth and breadth. In developing SeedMind, we addressed key challenges including input format limitations, context window constraints, and ensuring consistent, progress-aware behavior. Intensive evaluations with real-world applications show that SeedMind effectively harnesses LLMs to generate high-quality test cases and facilitate fuzzing in bug finding, presenting utility comparable to human-created seeds and significantly outperforming the existing LLM-based solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18143v1-abstract-full').style.display = 'none'; document.getElementById('2411.18143v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17404">arXiv:2411.17404</a> <span> [<a href="https://arxiv.org/pdf/2411.17404">pdf</a>, <a href="https://arxiv.org/format/2411.17404">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BPP-Search: Enhancing Tree of Thought Reasoning for Mathematical Modeling Problem Solving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Teng Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wing-Yin Yu</a>, <a href="/search/cs?searchtype=author&query=He%2C+Z">Zhenqi He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zehua Liu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xiongwei Han</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+H">Hailei Gong</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Han Wu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wei Shi</a>, <a href="/search/cs?searchtype=author&query=She%2C+R">Ruifeng She</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+F">Fangzhou Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+T">Tao Zhong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17404v2-abstract-short" style="display: inline;"> LLMs exhibit advanced reasoning capabilities, offering the potential to transform natural language questions into mathematical models. However, existing open-source datasets in operations research domain lack detailed annotations of the modeling process, such as variable definitions, focusing solely on objective values, which hinders reinforcement learning applications. To address this, we release… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17404v2-abstract-full').style.display = 'inline'; document.getElementById('2411.17404v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17404v2-abstract-full" style="display: none;"> LLMs exhibit advanced reasoning capabilities, offering the potential to transform natural language questions into mathematical models. However, existing open-source datasets in operations research domain lack detailed annotations of the modeling process, such as variable definitions, focusing solely on objective values, which hinders reinforcement learning applications. To address this, we release the StructuredOR dataset, annotated with comprehensive labels that capture the complete mathematical modeling process. We further propose BPP-Search, a algorithm that integrates reinforcement learning into a tree-of-thought structure using Beam search, a Process reward model, and a pairwise Preference algorithm. This approach enables efficient exploration of tree structures, avoiding exhaustive search while improving accuracy. Extensive experiments on StructuredOR, NL4OPT, and MAMO-ComplexLP datasets show that BPP-Search significantly outperforms state-of-the-art methods. In tree-based reasoning, BPP-Search excels in accuracy and efficiency, enabling faster retrieval of correct solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17404v2-abstract-full').style.display = 'none'; document.getElementById('2411.17404v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14199">arXiv:2411.14199</a> <span> [<a href="https://arxiv.org/pdf/2411.14199">pdf</a>, <a href="https://arxiv.org/format/2411.14199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OpenScholar: Synthesizing Scientific Literature with Retrieval-augmented LMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Asai%2C+A">Akari Asai</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jacqueline He</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+R">Rulin Shao</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Amanpreet Singh</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+J+C">Joseph Chee Chang</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/cs?searchtype=author&query=Soldaini%2C+L">Luca Soldaini</a>, <a href="/search/cs?searchtype=author&query=Feldman%2C+S">Sergey Feldman</a>, <a href="/search/cs?searchtype=author&query=D%27arcy%2C+M">Mike D'arcy</a>, <a href="/search/cs?searchtype=author&query=Wadden%2C+D">David Wadden</a>, <a href="/search/cs?searchtype=author&query=Latzke%2C+M">Matt Latzke</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+M">Minyang Tian</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+P">Pan Ji</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shengyan Liu</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+H">Hao Tong</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bohao Wu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Y">Yanyu Xiong</a>, <a href="/search/cs?searchtype=author&query=Zettlemoyer%2C+L">Luke Zettlemoyer</a>, <a href="/search/cs?searchtype=author&query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&query=Weld%2C+D">Dan Weld</a>, <a href="/search/cs?searchtype=author&query=Downey%2C+D">Doug Downey</a>, <a href="/search/cs?searchtype=author&query=Yih%2C+W">Wen-tau Yih</a>, <a href="/search/cs?searchtype=author&query=Koh%2C+P+W">Pang Wei Koh</a>, <a href="/search/cs?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14199v1-abstract-short" style="display: inline;"> Scientific progress depends on researchers' ability to synthesize the growing body of literature. Can large language models (LMs) assist scientists in this task? We introduce OpenScholar, a specialized retrieval-augmented LM that answers scientific queries by identifying relevant passages from 45 million open-access papers and synthesizing citation-backed responses. To evaluate OpenScholar, we dev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14199v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14199v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14199v1-abstract-full" style="display: none;"> Scientific progress depends on researchers' ability to synthesize the growing body of literature. Can large language models (LMs) assist scientists in this task? We introduce OpenScholar, a specialized retrieval-augmented LM that answers scientific queries by identifying relevant passages from 45 million open-access papers and synthesizing citation-backed responses. To evaluate OpenScholar, we develop ScholarQABench, the first large-scale multi-domain benchmark for literature search, comprising 2,967 expert-written queries and 208 long-form answers across computer science, physics, neuroscience, and biomedicine. On ScholarQABench, OpenScholar-8B outperforms GPT-4o by 5% and PaperQA2 by 7% in correctness, despite being a smaller, open model. While GPT4o hallucinates citations 78 to 90% of the time, OpenScholar achieves citation accuracy on par with human experts. OpenScholar's datastore, retriever, and self-feedback inference loop also improves off-the-shelf LMs: for instance, OpenScholar-GPT4o improves GPT-4o's correctness by 12%. In human evaluations, experts preferred OpenScholar-8B and OpenScholar-GPT4o responses over expert-written ones 51% and 70% of the time, respectively, compared to GPT4o's 32%. We open-source all of our code, models, datastore, data and a public demo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14199v1-abstract-full').style.display = 'none'; document.getElementById('2411.14199v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13779">arXiv:2411.13779</a> <span> [<a href="https://arxiv.org/pdf/2411.13779">pdf</a>, <a href="https://arxiv.org/format/2411.13779">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NewsInterview: a Dataset and a Playground to Evaluate LLMs' Ground Gap via Informational Interviews </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+M">Michael Lu</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+H+J">Hyundong Justin Cho</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weiyan Shi</a>, <a href="/search/cs?searchtype=author&query=May%2C+J">Jonathan May</a>, <a href="/search/cs?searchtype=author&query=Spangher%2C+A">Alexander Spangher</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13779v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated impressive capabilities in generating coherent text but often struggle with grounding language and strategic dialogue. To address this gap, we focus on journalistic interviews, a domain rich in grounding communication and abundant in data. We curate a dataset of 40,000 two-person informational interviews from NPR and CNN, and reveal that LLMs are sign… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13779v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13779v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13779v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated impressive capabilities in generating coherent text but often struggle with grounding language and strategic dialogue. To address this gap, we focus on journalistic interviews, a domain rich in grounding communication and abundant in data. We curate a dataset of 40,000 two-person informational interviews from NPR and CNN, and reveal that LLMs are significantly less likely than human interviewers to use acknowledgements and to pivot to higher-level questions. Realizing that a fundamental deficit exists in multi-turn planning and strategic thinking, we develop a realistic simulated environment, incorporating source personas and persuasive elements, in order to facilitate the development of agents with longer-horizon rewards. Our experiments show that while source LLMs mimic human behavior in information sharing, interviewer LLMs struggle with recognizing when questions are answered and engaging persuasively, leading to suboptimal information extraction across model size and capability. These findings underscore the need for enhancing LLMs' strategic dialogue capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13779v1-abstract-full').style.display = 'none'; document.getElementById('2411.13779v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21518">arXiv:2410.21518</a> <span> [<a href="https://arxiv.org/pdf/2410.21518">pdf</a>, <a href="https://arxiv.org/format/2410.21518">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Predicting sub-population specific viral evolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenxian Shi</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Menghua Wu</a>, <a href="/search/cs?searchtype=author&query=Barzilay%2C+R">Regina Barzilay</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21518v1-abstract-short" style="display: inline;"> Forecasting the change in the distribution of viral variants is crucial for therapeutic design and disease surveillance. This task poses significant modeling challenges due to the sharp differences in virus distributions across sub-populations (e.g., countries) and their dynamic interactions. Existing machine learning approaches that model the variant distribution as a whole are incapable of makin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21518v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21518v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21518v1-abstract-full" style="display: none;"> Forecasting the change in the distribution of viral variants is crucial for therapeutic design and disease surveillance. This task poses significant modeling challenges due to the sharp differences in virus distributions across sub-populations (e.g., countries) and their dynamic interactions. Existing machine learning approaches that model the variant distribution as a whole are incapable of making location-specific predictions and ignore transmissions that shape the viral landscape. In this paper, we propose a sub-population specific protein evolution model, which predicts the time-resolved distributions of viral proteins in different locations. The algorithm explicitly models the transmission rates between sub-populations and learns their interdependence from data. The change in protein distributions across all sub-populations is defined through a linear ordinary differential equation (ODE) parametrized by transmission rates. Solving this ODE yields the likelihood of a given protein occurring in particular sub-populations. Multi-year evaluation on both SARS-CoV-2 and influenza A/H3N2 demonstrates that our model outperforms baselines in accurately predicting distributions of viral proteins across continents and countries. We also find that the transmission rates learned from data are consistent with the transmission pathways discovered by retrospective phylogenetic analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21518v1-abstract-full').style.display = 'none'; document.getElementById('2410.21518v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21236">arXiv:2410.21236</a> <span> [<a href="https://arxiv.org/pdf/2410.21236">pdf</a>, <a href="https://arxiv.org/format/2410.21236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Flaming-hot Initiation with Regular Execution Sampling for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weizhe Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhicheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guanlin Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+R">Renjie Zheng</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenlei Shi</a>, <a href="/search/cs?searchtype=author&query=Dun%2C+C">Chen Dun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zheng Wu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xing Jin</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lin Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21236v2-abstract-short" style="display: inline;"> Since the release of ChatGPT, large language models (LLMs) have demonstrated remarkable capabilities across various domains. A key challenge in developing these general capabilities is efficiently sourcing diverse, high-quality data. This becomes especially critical in reasoning-related tasks with sandbox checkers, such as math or code, where the goal is to generate correct solutions to specific p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21236v2-abstract-full').style.display = 'inline'; document.getElementById('2410.21236v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21236v2-abstract-full" style="display: none;"> Since the release of ChatGPT, large language models (LLMs) have demonstrated remarkable capabilities across various domains. A key challenge in developing these general capabilities is efficiently sourcing diverse, high-quality data. This becomes especially critical in reasoning-related tasks with sandbox checkers, such as math or code, where the goal is to generate correct solutions to specific problems with higher probability. In this work, we introduce Flaming-hot Initiation with Regular Execution (FIRE) sampling, a simple yet highly effective method to efficiently find good responses. Our empirical findings show that FIRE sampling enhances inference-time generation quality and also benefits training in the alignment stage. Furthermore, we explore how FIRE sampling improves performance by promoting diversity and analyze the impact of employing FIRE at different positions within a response. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21236v2-abstract-full').style.display = 'none'; document.getElementById('2410.21236v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19265">arXiv:2410.19265</a> <span> [<a href="https://arxiv.org/pdf/2410.19265">pdf</a>, <a href="https://arxiv.org/format/2410.19265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Deep Graph Learning under Distribution Shifts: from Graph Out-of-Distribution Generalization to Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kexin Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuhan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Song Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weili Shi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pan Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Sheng Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jundong Li</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19265v1-abstract-short" style="display: inline;"> Distribution shifts on graphs -- the discrepancies in data distribution between training and employing a graph machine learning model -- are ubiquitous and often unavoidable in real-world scenarios. These shifts may severely deteriorate model performance, posing significant challenges for reliable graph machine learning. Consequently, there has been a surge in research on graph machine learning un… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19265v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19265v1-abstract-full" style="display: none;"> Distribution shifts on graphs -- the discrepancies in data distribution between training and employing a graph machine learning model -- are ubiquitous and often unavoidable in real-world scenarios. These shifts may severely deteriorate model performance, posing significant challenges for reliable graph machine learning. Consequently, there has been a surge in research on graph machine learning under distribution shifts, aiming to train models to achieve satisfactory performance on out-of-distribution (OOD) test data. In our survey, we provide an up-to-date and forward-looking review of deep graph learning under distribution shifts. Specifically, we cover three primary scenarios: graph OOD generalization, training-time graph OOD adaptation, and test-time graph OOD adaptation. We begin by formally formulating the problems and discussing various types of distribution shifts that can affect graph learning, such as covariate shifts and concept shifts. To provide a better understanding of the literature, we systematically categorize the existing models based on our proposed taxonomy and investigate the adopted techniques behind. We also summarize commonly used datasets in this research area to facilitate further investigation. Finally, we point out promising research directions and the corresponding challenges to encourage further study in this vital domain. Additionally, we provide a continuously updated reading list at https://github.com/kaize0409/Awesome-Graph-OOD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19265v1-abstract-full').style.display = 'none'; document.getElementById('2410.19265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 2 figures. arXiv admin note: text overlap with arXiv:2402.11153</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17621">arXiv:2410.17621</a> <span> [<a href="https://arxiv.org/pdf/2410.17621">pdf</a>, <a href="https://arxiv.org/format/2410.17621">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Process Supervision-Guided Policy Optimization for Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dai%2C+N">Ning Dai</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zheng Wu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+R">Renjie Zheng</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Ziyun Wei</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenlei Shi</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xing Jin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guanlin Liu</a>, <a href="/search/cs?searchtype=author&query=Dun%2C+C">Chen Dun</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Liang Huang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lin Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17621v2-abstract-short" style="display: inline;"> Reinforcement learning (RL) with unit test feedback has enhanced large language models' (LLMs) code generation, but relies on sparse rewards provided only after complete code evaluation, limiting learning efficiency and incremental improvements. When generated code fails all unit tests, no learning signal is received, hindering progress on complex tasks. To address this, we propose a Process Rewar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17621v2-abstract-full').style.display = 'inline'; document.getElementById('2410.17621v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17621v2-abstract-full" style="display: none;"> Reinforcement learning (RL) with unit test feedback has enhanced large language models' (LLMs) code generation, but relies on sparse rewards provided only after complete code evaluation, limiting learning efficiency and incremental improvements. When generated code fails all unit tests, no learning signal is received, hindering progress on complex tasks. To address this, we propose a Process Reward Model (PRM) that delivers dense, line-level feedback on code correctness during generation, mimicking human code refinement and providing immediate guidance. We explore various strategies for training PRMs and integrating them into the RL framework, finding that using PRMs both as dense rewards and for value function initialization significantly boosts performance. Our experimental results also highlight the effectiveness of PRMs in enhancing RL-driven code generation, especially for long-horizon scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17621v2-abstract-full').style.display = 'none'; document.getElementById('2410.17621v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> I.2.7; </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13085">arXiv:2410.13085</a> <span> [<a href="https://arxiv.org/pdf/2410.13085">pdf</a>, <a href="https://arxiv.org/format/2410.13085">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MMed-RAG: Versatile Multimodal RAG System for Medical Vision Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+P">Peng Xia</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+K">Kangyu Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haoran Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianze Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sheng Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Linjun Zhang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">James Zou</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+H">Huaxiu Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13085v1-abstract-short" style="display: inline;"> Artificial Intelligence (AI) has demonstrated significant potential in healthcare, particularly in disease diagnosis and treatment planning. Recent progress in Medical Large Vision-Language Models (Med-LVLMs) has opened up new possibilities for interactive diagnostic tools. However, these models often suffer from factual hallucination, which can lead to incorrect diagnoses. Fine-tuning and retriev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13085v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13085v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13085v1-abstract-full" style="display: none;"> Artificial Intelligence (AI) has demonstrated significant potential in healthcare, particularly in disease diagnosis and treatment planning. Recent progress in Medical Large Vision-Language Models (Med-LVLMs) has opened up new possibilities for interactive diagnostic tools. However, these models often suffer from factual hallucination, which can lead to incorrect diagnoses. Fine-tuning and retrieval-augmented generation (RAG) have emerged as methods to address these issues. However, the amount of high-quality data and distribution shifts between training data and deployment data limit the application of fine-tuning methods. Although RAG is lightweight and effective, existing RAG-based approaches are not sufficiently general to different medical domains and can potentially cause misalignment issues, both between modalities and between the model and the ground truth. In this paper, we propose a versatile multimodal RAG system, MMed-RAG, designed to enhance the factuality of Med-LVLMs. Our approach introduces a domain-aware retrieval mechanism, an adaptive retrieved contexts selection method, and a provable RAG-based preference fine-tuning strategy. These innovations make the RAG process sufficiently general and reliable, significantly improving alignment when introducing retrieved contexts. Experimental results across five medical datasets (involving radiology, ophthalmology, pathology) on medical VQA and report generation demonstrate that MMed-RAG can achieve an average improvement of 43.8% in the factual accuracy of Med-LVLMs. Our data and code are available in https://github.com/richard-peng-xia/MMed-RAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13085v1-abstract-full').style.display = 'none'; document.getElementById('2410.13085v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12799">arXiv:2410.12799</a> <span> [<a href="https://arxiv.org/pdf/2410.12799">pdf</a>, <a href="https://arxiv.org/format/2410.12799">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Ads Supply Personalization via Doubly Robust Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wei Shi</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+C">Chen Fu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qi Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Sanjian Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jizhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qinqin Zhu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Z">Zhigang Hua</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shuang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12799v1-abstract-short" style="display: inline;"> Ads supply personalization aims to balance the revenue and user engagement, two long-term objectives in social media ads, by tailoring the ad quantity and density. In the industry-scale system, the challenge for ads supply lies in modeling the counterfactual effects of a conservative supply treatment (e.g., a small density change) over an extended duration. In this paper, we present a streamlined… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12799v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12799v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12799v1-abstract-full" style="display: none;"> Ads supply personalization aims to balance the revenue and user engagement, two long-term objectives in social media ads, by tailoring the ad quantity and density. In the industry-scale system, the challenge for ads supply lies in modeling the counterfactual effects of a conservative supply treatment (e.g., a small density change) over an extended duration. In this paper, we present a streamlined framework for personalized ad supply. This framework optimally utilizes information from data collection policies through the doubly robust learning. Consequently, it significantly improves the accuracy of long-term treatment effect estimates. Additionally, its low-complexity design not only results in computational cost savings compared to existing methods, but also makes it scalable for billion-scale applications. Through both offline experiments and online production tests, the framework consistently demonstrated significant improvements in top-line business metrics over months. The framework has been fully deployed to live traffic in one of the world's largest social media platforms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12799v1-abstract-full').style.display = 'none'; document.getElementById('2410.12799v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CIKM'24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11538">arXiv:2410.11538</a> <span> [<a href="https://arxiv.org/pdf/2410.11538">pdf</a>, <a href="https://arxiv.org/format/2410.11538">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MCTBench: Multimodal Cognition towards Text-Rich Visual Scenes Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shan%2C+B">Bin Shan</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+X">Xiang Fei</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wei Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A">An-Lan Wang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+G">Guozhi Tang</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+L">Lei Liao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jingqun Tang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+X">Xiang Bai</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Can Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11538v1-abstract-short" style="display: inline;"> The comprehension of text-rich visual scenes has become a focal point for evaluating Multi-modal Large Language Models (MLLMs) due to their widespread applications. Current benchmarks tailored to the scenario emphasize perceptual capabilities, while overlooking the assessment of cognitive abilities. To address this limitation, we introduce a Multimodal benchmark towards Text-rich visual scenes, to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11538v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11538v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11538v1-abstract-full" style="display: none;"> The comprehension of text-rich visual scenes has become a focal point for evaluating Multi-modal Large Language Models (MLLMs) due to their widespread applications. Current benchmarks tailored to the scenario emphasize perceptual capabilities, while overlooking the assessment of cognitive abilities. To address this limitation, we introduce a Multimodal benchmark towards Text-rich visual scenes, to evaluate the Cognitive capabilities of MLLMs through visual reasoning and content-creation tasks (MCTBench). To mitigate potential evaluation bias from the varying distributions of datasets, MCTBench incorporates several perception tasks (e.g., scene text recognition) to ensure a consistent comparison of both the cognitive and perceptual capabilities of MLLMs. To improve the efficiency and fairness of content-creation evaluation, we conduct an automatic evaluation pipeline. Evaluations of various MLLMs on MCTBench reveal that, despite their impressive perceptual capabilities, their cognition abilities require enhancement. We hope MCTBench will offer the community an efficient resource to explore and enhance cognitive capabilities towards text-rich visual scenes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11538v1-abstract-full').style.display = 'none'; document.getElementById('2410.11538v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 5 figures, project page: https://github.com/xfey/MCTBench?tab=readme-ov-file</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08196">arXiv:2410.08196</a> <span> [<a href="https://arxiv.org/pdf/2410.08196">pdf</a>, <a href="https://arxiv.org/format/2410.08196">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MathCoder2: Better Math Reasoning from Continued Pretraining on Model-translated Mathematical Code </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zimu Lu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Aojun Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Ke Wang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+H">Houxing Ren</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weikang Shi</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J">Junting Pan</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+M">Mingjie Zhan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongsheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08196v1-abstract-short" style="display: inline;"> Code has been shown to be effective in enhancing the mathematical reasoning abilities of large language models due to its precision and accuracy. Previous works involving continued mathematical pretraining often include code that utilizes math-related packages, which are primarily designed for fields such as engineering, machine learning, signal processing, or module testing, rather than being dir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08196v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08196v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08196v1-abstract-full" style="display: none;"> Code has been shown to be effective in enhancing the mathematical reasoning abilities of large language models due to its precision and accuracy. Previous works involving continued mathematical pretraining often include code that utilizes math-related packages, which are primarily designed for fields such as engineering, machine learning, signal processing, or module testing, rather than being directly focused on mathematical reasoning. In this paper, we introduce a novel method for generating mathematical code accompanied with corresponding reasoning steps for continued pretraining. Our approach begins with the construction of a high-quality mathematical continued pretraining dataset by incorporating math-related web data, code using mathematical packages, math textbooks, and synthetic data. Next, we construct reasoning steps by extracting LaTeX expressions, the conditions needed for the expressions, and the results of the expressions from the previously collected dataset. Based on this extracted information, we generate corresponding code to accurately capture the mathematical reasoning process. Appending the generated code to each reasoning step results in data consisting of paired natural language reasoning steps and their corresponding code. Combining this data with the original dataset results in a 19.2B-token high-performing mathematical pretraining corpus, which we name MathCode-Pile. Training several popular base models with this corpus significantly improves their mathematical abilities, leading to the creation of the MathCoder2 family of models. All of our data processing and training code is open-sourced, ensuring full transparency and easy reproducibility of the entire data collection and training pipeline. The code is released at https://github.com/mathllm/MathCoder2 . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08196v1-abstract-full').style.display = 'none'; document.getElementById('2410.08196v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://github.com/mathllm/MathCoder2</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06519">arXiv:2410.06519</a> <span> [<a href="https://arxiv.org/pdf/2410.06519">pdf</a>, <a href="https://arxiv.org/format/2410.06519">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SEGMENT+: Long Text Processing with Short-Context Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wei Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuang Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+K">Kerun Yu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jinglei Chen</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Z">Zujie Liang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xinhui Wu</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+Y">Yuxi Qian</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+F">Feng Wei</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+B">Bo Zheng</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jiaqing Liang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiangjie Chen</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Y">Yanghua Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06519v1-abstract-short" style="display: inline;"> There is a growing interest in expanding the input capacity of language models (LMs) across various domains. However, simply increasing the context window does not guarantee robust performance across diverse long-input processing tasks, such as understanding extensive documents and extracting detailed information from lengthy and noisy data. In response, we introduce SEGMENT+, a general framework… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06519v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06519v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06519v1-abstract-full" style="display: none;"> There is a growing interest in expanding the input capacity of language models (LMs) across various domains. However, simply increasing the context window does not guarantee robust performance across diverse long-input processing tasks, such as understanding extensive documents and extracting detailed information from lengthy and noisy data. In response, we introduce SEGMENT+, a general framework that enables LMs to handle extended inputs within limited context windows efficiently. SEGMENT+ utilizes structured notes and a filtering module to manage information flow, resulting in a system that is both controllable and interpretable. Our extensive experiments across various model sizes, focusing on long-document question-answering and Needle-in-a-Haystack tasks, demonstrate the effectiveness of SEGMENT+ in improving performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06519v1-abstract-full').style.display = 'none'; document.getElementById('2410.06519v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02678">arXiv:2410.02678</a> <span> [<a href="https://arxiv.org/pdf/2410.02678">pdf</a>, <a href="https://arxiv.org/format/2410.02678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Distilling an End-to-End Voice Assistant Without Instruction Training Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Held%2C+W">William Held</a>, <a href="/search/cs?searchtype=author&query=Li%2C+E">Ella Li</a>, <a href="/search/cs?searchtype=author&query=Ryan%2C+M">Michael Ryan</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weiyan Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanzhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Diyi Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02678v1-abstract-short" style="display: inline;"> Voice assistants, such as Siri and Google Assistant, typically model audio and text separately, resulting in lost speech information and increased complexity. Recent efforts to address this with end-to-end Speech Large Language Models (LLMs) trained with supervised finetuning (SFT) have led to models ``forgetting" capabilities from text-only LLMs. Our work proposes an alternative paradigm for tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02678v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02678v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02678v1-abstract-full" style="display: none;"> Voice assistants, such as Siri and Google Assistant, typically model audio and text separately, resulting in lost speech information and increased complexity. Recent efforts to address this with end-to-end Speech Large Language Models (LLMs) trained with supervised finetuning (SFT) have led to models ``forgetting" capabilities from text-only LLMs. Our work proposes an alternative paradigm for training Speech LLMs without instruction data, using the response of a text-only LLM to transcripts as self-supervision. Importantly, this process can be performed without annotated responses. We show that our Distilled Voice Assistant (DiVA) generalizes to Spoken Question Answering, Classification, and Translation. Furthermore, we show that DiVA better meets user preferences, achieving a 72\% win rate compared with state-of-the-art models like Qwen 2 Audio, despite using $>$100x less training compute. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02678v1-abstract-full').style.display = 'none'; document.getElementById('2410.02678v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19401">arXiv:2409.19401</a> <span> [<a href="https://arxiv.org/pdf/2409.19401">pdf</a>, <a href="https://arxiv.org/format/2409.19401">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Crafting Personalized Agents through Retrieval-Augmented Generation on Editable Memory Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zheng Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhongyang Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zeren Jiang</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+D">Dandan Tu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19401v1-abstract-short" style="display: inline;"> In the age of mobile internet, user data, often referred to as memories, is continuously generated on personal devices. Effectively managing and utilizing this data to deliver services to users is a compelling research topic. In this paper, we introduce a novel task of crafting personalized agents powered by large language models (LLMs), which utilize a user's smartphone memories to enhance downst… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19401v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19401v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19401v1-abstract-full" style="display: none;"> In the age of mobile internet, user data, often referred to as memories, is continuously generated on personal devices. Effectively managing and utilizing this data to deliver services to users is a compelling research topic. In this paper, we introduce a novel task of crafting personalized agents powered by large language models (LLMs), which utilize a user's smartphone memories to enhance downstream applications with advanced LLM capabilities. To achieve this goal, we introduce EMG-RAG, a solution that combines Retrieval-Augmented Generation (RAG) techniques with an Editable Memory Graph (EMG). This approach is further optimized using Reinforcement Learning to address three distinct challenges: data collection, editability, and selectability. Extensive experiments on a real-world dataset validate the effectiveness of EMG-RAG, achieving an improvement of approximately 10% over the best existing approach. Additionally, the personalized agents have been transferred into a real smartphone AI assistant, which leads to enhanced usability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19401v1-abstract-full').style.display = 'none'; document.getElementById('2409.19401v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18885">arXiv:2409.18885</a> <span> [<a href="https://arxiv.org/pdf/2409.18885">pdf</a>, <a href="https://arxiv.org/format/2409.18885">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> HR-Extreme: A High-Resolution Dataset for Extreme Weather Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ran%2C+N">Nian Ran</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+P">Peng Xiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wesley Shi</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jianxin Lin</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Q">Qi Meng</a>, <a href="/search/cs?searchtype=author&query=Allmendinger%2C+R">Richard Allmendinger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18885v2-abstract-short" style="display: inline;"> The application of large deep learning models in weather forecasting has led to significant advancements in the field, including higher-resolution forecasting and extended prediction periods exemplified by models such as Pangu and Fuxi. Despite these successes, previous research has largely been characterized by the neglect of extreme weather events, and the availability of datasets specifically c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18885v2-abstract-full').style.display = 'inline'; document.getElementById('2409.18885v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18885v2-abstract-full" style="display: none;"> The application of large deep learning models in weather forecasting has led to significant advancements in the field, including higher-resolution forecasting and extended prediction periods exemplified by models such as Pangu and Fuxi. Despite these successes, previous research has largely been characterized by the neglect of extreme weather events, and the availability of datasets specifically curated for such events remains limited. Given the critical importance of accurately forecasting extreme weather, this study introduces a comprehensive dataset that incorporates high-resolution extreme weather cases derived from the High-Resolution Rapid Refresh (HRRR) data, a 3-km real-time dataset provided by NOAA. We also evaluate the current state-of-the-art deep learning models and Numerical Weather Prediction (NWP) systems on HR-Extreme, and provide a improved baseline deep learning model called HR-Heim which has superior performance on both general loss and HR-Extreme compared to others. Our results reveal that the errors of extreme weather cases are significantly larger than overall forecast error, highlighting them as an crucial source of loss in weather prediction. These findings underscore the necessity for future research to focus on improving the accuracy of extreme weather forecasts to enhance their practical utility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18885v2-abstract-full').style.display = 'none'; document.getElementById('2409.18885v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the International Conference on Learning Representations (ICLR) 2025. Supplementary matrials link: https://openreview.net/forum?id=5AtlfHYCPa</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15316">arXiv:2409.15316</a> <span> [<a href="https://arxiv.org/pdf/2409.15316">pdf</a>, <a href="https://arxiv.org/format/2409.15316">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Towards Social AI: A Survey on Understanding Social Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+S">Sangmin Lee</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Minzhi Li</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+B">Bolin Lai</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+W">Wenqi Jia</a>, <a href="/search/cs?searchtype=author&query=Ryan%2C+F">Fiona Ryan</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+X">Xu Cao</a>, <a href="/search/cs?searchtype=author&query=Kara%2C+O">Ozgur Kara</a>, <a href="/search/cs?searchtype=author&query=Boote%2C+B">Bikram Boote</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weiyan Shi</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Diyi Yang</a>, <a href="/search/cs?searchtype=author&query=Rehg%2C+J+M">James M. Rehg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15316v2-abstract-short" style="display: inline;"> Social interactions form the foundation of human societies. Artificial intelligence has made significant progress in certain areas, but enabling machines to seamlessly understand social interactions remains an open challenge. It is important to address this gap by endowing machines with social capabilities. We identify three key capabilities needed for effective social understanding: 1) understand… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15316v2-abstract-full').style.display = 'inline'; document.getElementById('2409.15316v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15316v2-abstract-full" style="display: none;"> Social interactions form the foundation of human societies. Artificial intelligence has made significant progress in certain areas, but enabling machines to seamlessly understand social interactions remains an open challenge. It is important to address this gap by endowing machines with social capabilities. We identify three key capabilities needed for effective social understanding: 1) understanding multimodal social cues, 2) understanding multi-party dynamics, and 3) understanding beliefs. Building upon these foundations, we classify and review existing machine learning works on social understanding from the perspectives of verbal, non-verbal, and multimodal social cues. The verbal branch focuses on understanding linguistic signals such as speaker intent, dialogue sentiment, and commonsense reasoning. The non-verbal branch addresses techniques for perceiving social meaning from visual behaviors such as body gestures, gaze patterns, and facial expressions. The multimodal branch covers approaches that integrate verbal and non-verbal multimodal cues to holistically interpret social interactions such as recognizing emotions, conversational dynamics, and social situations. By reviewing the scope and limitations of current approaches and benchmarks, we aim to clarify the development trajectory and illuminate the path towards more comprehensive intelligence for social understanding. We hope this survey will spur further research interest and insights into this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15316v2-abstract-full').style.display = 'none'; document.getElementById('2409.15316v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13994">arXiv:2409.13994</a> <span> [<a href="https://arxiv.org/pdf/2409.13994">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Contrastive Learning for Knowledge-Based Question Generation in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhenhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiajing Chen</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weiyan Shi</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+L">Lingjie Yi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chihang Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qian Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13994v2-abstract-short" style="display: inline;"> With the rapid development of artificial intelligence technology, especially the increasingly widespread application of question-and-answer systems, high-quality question generation has become a key component in supporting the development of these systems. This article focuses on knowledge-based question generation technology, which aims to enable computers to simulate the human questioning proces… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13994v2-abstract-full').style.display = 'inline'; document.getElementById('2409.13994v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13994v2-abstract-full" style="display: none;"> With the rapid development of artificial intelligence technology, especially the increasingly widespread application of question-and-answer systems, high-quality question generation has become a key component in supporting the development of these systems. This article focuses on knowledge-based question generation technology, which aims to enable computers to simulate the human questioning process based on understanding specific texts or knowledge bases. In light of the issues of hallucination and knowledge gaps present in large-scale language models when applied to knowledge-intensive tasks, this paper proposes an enhanced question generation method that incorporates contrastive learning. This method utilizes multiple models to jointly mine domain knowledge and uses contrastive learning to guide the model in reducing noise and hallucinations in generation. Experimental results show that by designing prompts containing contrasting examples, the model's performance in question generation improves considerably, particularly when contrasting instructions and examples are used simultaneously, leading to the highest quality of generated questions and improved accuracy. These results demonstrate that the method proposed in this study, which combines contrasting context and chain-of-thought prompts, can effectively improve both the quality and the practicality of question generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13994v2-abstract-full').style.display = 'none'; document.getElementById('2409.13994v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11744">arXiv:2409.11744</a> <span> [<a href="https://arxiv.org/pdf/2409.11744">pdf</a>, <a href="https://arxiv.org/ps/2409.11744">ps</a>, <a href="https://arxiv.org/format/2409.11744">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Exploring Gaze Pattern Differences Between ASD and TD Children Using Internal Cluster Validity Indices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weiyan Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haihong Zhang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+R">Ruiqing Ding</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">YongWei Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&query=Choo%2C+K+T+W">Kenny Tsu Wei Choo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11744v2-abstract-short" style="display: inline;"> Autism Spectrum Disorder (ASD) affects children's social and communication abilities, with eye-tracking widely used to identify atypical gaze patterns. While unsupervised clustering can automate the creation of areas of interest for gaze feature extraction, the use of internal cluster validity indices, like Silhouette Coefficient, to distinguish gaze pattern differences between ASD and typically d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11744v2-abstract-full').style.display = 'inline'; document.getElementById('2409.11744v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11744v2-abstract-full" style="display: none;"> Autism Spectrum Disorder (ASD) affects children's social and communication abilities, with eye-tracking widely used to identify atypical gaze patterns. While unsupervised clustering can automate the creation of areas of interest for gaze feature extraction, the use of internal cluster validity indices, like Silhouette Coefficient, to distinguish gaze pattern differences between ASD and typically developing (TD) children remains underexplored. We explore whether internal cluster validity indices can distinguish ASD from TD children. Specifically, we apply seven clustering algorithms to gaze points and extract 63 internal cluster validity indices to reveal correlations with ASD diagnosis. Using these indices, we train predictive models for ASD diagnosis. Experiments on three datasets demonstrate high predictive accuracy (81\% AUC), validating the effectiveness of these indices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11744v2-abstract-full').style.display = 'none'; document.getElementById('2409.11744v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06334">arXiv:2409.06334</a> <span> [<a href="https://arxiv.org/pdf/2409.06334">pdf</a>, <a href="https://arxiv.org/format/2409.06334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-Weather Image Restoration via Histogram-Based Transformer Feature Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yang Wen</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+A">Anyu Lai</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+B">Bo Qian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wuzhen Shi</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Wenming Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06334v1-abstract-short" style="display: inline;"> Currently, the mainstream restoration tasks under adverse weather conditions have predominantly focused on single-weather scenarios. However, in reality, multiple weather conditions always coexist and their degree of mixing is usually unknown. Under such complex and diverse weather conditions, single-weather restoration models struggle to meet practical demands. This is particularly critical in fi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06334v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06334v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06334v1-abstract-full" style="display: none;"> Currently, the mainstream restoration tasks under adverse weather conditions have predominantly focused on single-weather scenarios. However, in reality, multiple weather conditions always coexist and their degree of mixing is usually unknown. Under such complex and diverse weather conditions, single-weather restoration models struggle to meet practical demands. This is particularly critical in fields such as autonomous driving, where there is an urgent need for a model capable of effectively handling mixed weather conditions and enhancing image quality in an automated manner. In this paper, we propose a Task Sequence Generator module that, in conjunction with the Task Intra-patch Block, effectively extracts task-specific features embedded in degraded images. The Task Intra-patch Block introduces an external learnable sequence that aids the network in capturing task-specific information. Additionally, we employ a histogram-based transformer module as the backbone of our network, enabling the capture of both global and local dynamic range features. Our proposed model achieves state-of-the-art performance on public datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06334v1-abstract-full').style.display = 'none'; document.getElementById('2409.06334v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2409.03249</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Shi%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+W&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository