Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 282 results for author: <span class="mathjax">Yin, D</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Yin%2C+D">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Yin, D"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Yin%2C+D&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Yin, D"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yin%2C+D&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.17928">arXiv:2503.17928</a> <span> [<a href="https://arxiv.org/pdf/2503.17928">pdf</a>, <a href="https://arxiv.org/format/2503.17928">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Debiasing Multimodal Large Language Models via Noise-Aware Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zefeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hengzhu Tang</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+J">Jiawei Sheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yiming Ren</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenyang Li</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+D">Duohe Ma</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tingwen Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.17928v1-abstract-short" style="display: inline;"> Multimodal Large Language Models excel in various tasks, yet often struggle with modality bias, where the model tends to rely heavily on a single modality and overlook critical information in other modalities, which leads to incorrect focus and generating irrelevant responses. In this paper, we propose using the paradigm of preference optimization to solve the modality bias problem, including RLAI… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.17928v1-abstract-full').style.display = 'inline'; document.getElementById('2503.17928v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.17928v1-abstract-full" style="display: none;"> Multimodal Large Language Models excel in various tasks, yet often struggle with modality bias, where the model tends to rely heavily on a single modality and overlook critical information in other modalities, which leads to incorrect focus and generating irrelevant responses. In this paper, we propose using the paradigm of preference optimization to solve the modality bias problem, including RLAIFVBias, a debiased preference optimization dataset, and a Noise Aware Preference Optimization algorithm. Specifically, we first construct the dataset by introducing perturbations to reduce the informational content of certain modalities, compelling the model to rely on a specific modality when generating negative responses. To address the inevitable noise in automatically constructed data, we combine the noise robust Mean Absolute Error with the Binary Cross Entropy in Direct Preference Optimization by a negative Box Cox transformation, and dynamically adjust the algorithm noise robustness based on the evaluated noise levels in the data. Extensive experiments validate our approach, demonstrating not only its effectiveness in mitigating modality bias but also its significant role in minimizing hallucinations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.17928v1-abstract-full').style.display = 'none'; document.getElementById('2503.17928v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.10615">arXiv:2503.10615</a> <span> [<a href="https://arxiv.org/pdf/2503.10615">pdf</a>, <a href="https://arxiv.org/format/2503.10615">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> R1-Onevision: Advancing Generalized Multimodal Reasoning through Cross-Modal Formalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yi Yang</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiaoxuan He</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+H">Hongkun Pan</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xiyan Jiang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Y">Yan Deng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xingtao Yang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Haoyu Lu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dacheng Yin</a>, <a href="/search/cs?searchtype=author&query=Rao%2C+F">Fengyun Rao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+M">Minfeng Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bo Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.10615v2-abstract-short" style="display: inline;"> Large Language Models have demonstrated remarkable reasoning capability in complex textual tasks. However, multimodal reasoning, which requires integrating visual and textual information, remains a significant challenge. Existing visual-language models often struggle to effectively analyze and reason visual content, resulting in suboptimal performance on complex reasoning tasks. Moreover, the abse… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.10615v2-abstract-full').style.display = 'inline'; document.getElementById('2503.10615v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.10615v2-abstract-full" style="display: none;"> Large Language Models have demonstrated remarkable reasoning capability in complex textual tasks. However, multimodal reasoning, which requires integrating visual and textual information, remains a significant challenge. Existing visual-language models often struggle to effectively analyze and reason visual content, resulting in suboptimal performance on complex reasoning tasks. Moreover, the absence of comprehensive benchmarks hinders the accurate assessment of multimodal reasoning capabilities. In this paper, we introduce R1-Onevision, a multimodal reasoning model designed to bridge the gap between visual perception and deep reasoning. To achieve this, we propose a cross-modal reasoning pipeline that transforms images into formal textural representations, enabling precise language-based reasoning. Leveraging this pipeline, we construct the R1-Onevision dataset which provides detailed, step-by-step multimodal reasoning annotations across diverse domains. We further develop the R1-Onevision model through supervised fine-tuning and reinforcement learning to cultivate advanced reasoning and robust generalization abilities. To comprehensively evaluate multimodal reasoning performance across different grades, we introduce R1-Onevision-Bench, a benchmark aligned with human educational stages, covering exams from junior high school to university and beyond. Experimental results show that R1-Onevision achieves state-of-the-art performance, outperforming models such as GPT-4o and Qwen2.5-VL on multiple challenging multimodal reasoning benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.10615v2-abstract-full').style.display = 'none'; document.getElementById('2503.10615v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code and Model: https://github.com/Fancy-MLLM/R1-onevision</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.09382">arXiv:2503.09382</a> <span> [<a href="https://arxiv.org/pdf/2503.09382">pdf</a>, <a href="https://arxiv.org/format/2503.09382">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Next-Generation Recommender Systems: A Benchmark for Personalized Recommendation Assistant with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiani Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shijie Wang</a>, <a href="/search/cs?searchtype=author&query=Ning%2C+L">Liang-bo Ning</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+W">Wenqi Fan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qing Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.09382v1-abstract-short" style="display: inline;"> Recommender systems (RecSys) are widely used across various modern digital platforms and have garnered significant attention. Traditional recommender systems usually focus only on fixed and simple recommendation scenarios, making it difficult to generalize to new and unseen recommendation tasks in an interactive paradigm. Recently, the advancement of large language models (LLMs) has revolutionized… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.09382v1-abstract-full').style.display = 'inline'; document.getElementById('2503.09382v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.09382v1-abstract-full" style="display: none;"> Recommender systems (RecSys) are widely used across various modern digital platforms and have garnered significant attention. Traditional recommender systems usually focus only on fixed and simple recommendation scenarios, making it difficult to generalize to new and unseen recommendation tasks in an interactive paradigm. Recently, the advancement of large language models (LLMs) has revolutionized the foundational architecture of RecSys, driving their evolution into more intelligent and interactive personalized recommendation assistants. However, most existing studies rely on fixed task-specific prompt templates to generate recommendations and evaluate the performance of personalized assistants, which limits the comprehensive assessments of their capabilities. This is because commonly used datasets lack high-quality textual user queries that reflect real-world recommendation scenarios, making them unsuitable for evaluating LLM-based personalized recommendation assistants. To address this gap, we introduce RecBench+, a new dataset benchmark designed to access LLMs' ability to handle intricate user recommendation needs in the era of LLMs. RecBench+ encompasses a diverse set of queries that span both hard conditions and soft preferences, with varying difficulty levels. We evaluated commonly used LLMs on RecBench+ and uncovered below findings: 1) LLMs demonstrate preliminary abilities to act as recommendation assistants, 2) LLMs are better at handling queries with explicitly stated conditions, while facing challenges with queries that require reasoning or contain misleading information. Our dataset has been released at https://github.com/jiani-huang/RecBench.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.09382v1-abstract-full').style.display = 'none'; document.getElementById('2503.09382v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.04772">arXiv:2503.04772</a> <span> [<a href="https://arxiv.org/pdf/2503.04772">pdf</a>, <a href="https://arxiv.org/format/2503.04772">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generating Millions Of Lean Theorems With Proofs By Exploring State Transition Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+D">David Yin</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jing Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.04772v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated significant potential in generating mathematical proofs. However, a persistent challenge is that LLMs occasionally make mistakes, while even a minor mistake can invalidate an entire proof. Proof assistants like Lean offer a great remedy. They are designed for verifying each step of a proof in a formal language, and in recent years researchers have cre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.04772v1-abstract-full').style.display = 'inline'; document.getElementById('2503.04772v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.04772v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated significant potential in generating mathematical proofs. However, a persistent challenge is that LLMs occasionally make mistakes, while even a minor mistake can invalidate an entire proof. Proof assistants like Lean offer a great remedy. They are designed for verifying each step of a proof in a formal language, and in recent years researchers have created AI models to generate proofs in their languages. However, the scarcity of large-scale datasets of Lean proofs restrict the performance of such Automated Theorem Proving (ATP) models. We developed LeanNavigator, a novel method for generating a large-scale dataset of Lean theorems and proofs by finding new ways to prove existing Lean theorems. By leveraging an interactive Lean client and an efficient method for proof step generation, LeanNavigator efficiently produces new theorems with corresponding proofs. Applying this approach to Mathlib4, we generated 4.7 million theorems totaling 1 billion tokens, surpassing previous datasets by more than an order of magnitude. Using this extensive dataset, we trained an AI model that outperforms the state-of-the-art ReProver model in theorem-proving tasks. These results confirm our hypothesis and demonstrate the critical role of large datasets in improving the performance of automated theorem provers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.04772v1-abstract-full').style.display = 'none'; document.getElementById('2503.04772v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.01763">arXiv:2503.01763</a> <span> [<a href="https://arxiv.org/pdf/2503.01763">pdf</a>, <a href="https://arxiv.org/format/2503.01763">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Retrieval Models Aren't Tool-Savvy: Benchmarking Tool Retrieval for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhengliang Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhan Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyong Yan</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+P">Pengjie Ren</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhaochun Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.01763v1-abstract-short" style="display: inline;"> Tool learning aims to augment large language models (LLMs) with diverse tools, enabling them to act as agents for solving practical tasks. Due to the limited context length of tool-using LLMs, adopting information retrieval (IR) models to select useful tools from large toolsets is a critical initial step. However, the performance of IR models in tool retrieval tasks remains underexplored and uncle… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.01763v1-abstract-full').style.display = 'inline'; document.getElementById('2503.01763v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.01763v1-abstract-full" style="display: none;"> Tool learning aims to augment large language models (LLMs) with diverse tools, enabling them to act as agents for solving practical tasks. Due to the limited context length of tool-using LLMs, adopting information retrieval (IR) models to select useful tools from large toolsets is a critical initial step. However, the performance of IR models in tool retrieval tasks remains underexplored and unclear. Most tool-use benchmarks simplify this step by manually pre-annotating a small set of relevant tools for each task, which is far from the real-world scenarios. In this paper, we propose ToolRet, a heterogeneous tool retrieval benchmark comprising 7.6k diverse retrieval tasks, and a corpus of 43k tools, collected from existing datasets. We benchmark six types of models on ToolRet. Surprisingly, even the models with strong performance in conventional IR benchmarks, exhibit poor performance on ToolRet. This low retrieval quality degrades the task pass rate of tool-use LLMs. As a further step, we contribute a large-scale training dataset with over 200k instances, which substantially optimizes the tool retrieval ability of IR models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.01763v1-abstract-full').style.display = 'none'; document.getElementById('2503.01763v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18353">arXiv:2502.18353</a> <span> [<a href="https://arxiv.org/pdf/2502.18353">pdf</a>, <a href="https://arxiv.org/format/2502.18353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DBR: Divergence-Based Regularization for Debiasing Natural Language Understanding Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zihao Li</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruixiang Tang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+L">Lu Cheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Du%2C+M">Mengnan Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18353v1-abstract-short" style="display: inline;"> Pre-trained language models (PLMs) have achieved impressive results on various natural language processing tasks. However, recent research has revealed that these models often rely on superficial features and shortcuts instead of developing a genuine understanding of language, especially for natural language understanding (NLU) tasks. Consequently, the models struggle to generalize to out-of-domai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18353v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18353v1-abstract-full" style="display: none;"> Pre-trained language models (PLMs) have achieved impressive results on various natural language processing tasks. However, recent research has revealed that these models often rely on superficial features and shortcuts instead of developing a genuine understanding of language, especially for natural language understanding (NLU) tasks. Consequently, the models struggle to generalize to out-of-domain data. In this work, we propose Divergence Based Regularization (DBR) to mitigate this shortcut learning behavior. Our method measures the divergence between the output distributions for original examples and examples where shortcut tokens have been masked. This process prevents the model's predictions from being overly influenced by shortcut features or biases. We evaluate our model on three NLU tasks and find that it improves out-of-domain performance with little loss of in-domain accuracy. Our results demonstrate that reducing the reliance on shortcuts and superficial features can enhance the generalization ability of large pre-trained language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18353v1-abstract-full').style.display = 'none'; document.getElementById('2502.18353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SIGKDD Explorations</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15693">arXiv:2502.15693</a> <span> [<a href="https://arxiv.org/pdf/2502.15693">pdf</a>, <a href="https://arxiv.org/format/2502.15693">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hgformer: Hyperbolic Graph Transformer for Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xin Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingrun Li</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+H">Heng Chang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jinze Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xihong Yang</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+S">Shengyu Tao</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+N">Ningkang Chang</a>, <a href="/search/cs?searchtype=author&query=Shigeno%2C+M">Maiko Shigeno</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junfeng Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Min%2C+E">Erxue Min</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15693v1-abstract-short" style="display: inline;"> The cold start problem is a challenging problem faced by most modern recommender systems. By leveraging knowledge from other domains, cross-domain recommendation can be an effective method to alleviate the cold start problem. However, the modelling distortion for long-tail data, which is widely present in recommender systems, is often overlooked in cross-domain recommendation. In this research, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15693v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15693v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15693v1-abstract-full" style="display: none;"> The cold start problem is a challenging problem faced by most modern recommender systems. By leveraging knowledge from other domains, cross-domain recommendation can be an effective method to alleviate the cold start problem. However, the modelling distortion for long-tail data, which is widely present in recommender systems, is often overlooked in cross-domain recommendation. In this research, we propose a hyperbolic manifold based cross-domain collaborative filtering model using BiTGCF as the base model. We introduce the hyperbolic manifold and construct new propagation layer and transfer layer to address these challenges. The significant performance improvements across various datasets compared to the baseline models demonstrate the effectiveness of our proposed model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15693v1-abstract-full').style.display = 'none'; document.getElementById('2502.15693v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12970">arXiv:2502.12970</a> <span> [<a href="https://arxiv.org/pdf/2502.12970">pdf</a>, <a href="https://arxiv.org/format/2502.12970">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Reasoning-to-Defend: Safety-Aware Reasoning Can Defend Large Language Models from Jailbreaking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Junda Zhu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyong Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+L">Lei Sha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12970v1-abstract-short" style="display: inline;"> The reasoning abilities of Large Language Models (LLMs) have demonstrated remarkable advancement and exceptional performance across diverse domains. However, leveraging these reasoning capabilities to enhance LLM safety against adversarial attacks and jailbreak queries remains largely unexplored. To bridge this gap, we propose Reasoning-to-Defend (R2D), a novel training paradigm that integrates sa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12970v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12970v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12970v1-abstract-full" style="display: none;"> The reasoning abilities of Large Language Models (LLMs) have demonstrated remarkable advancement and exceptional performance across diverse domains. However, leveraging these reasoning capabilities to enhance LLM safety against adversarial attacks and jailbreak queries remains largely unexplored. To bridge this gap, we propose Reasoning-to-Defend (R2D), a novel training paradigm that integrates safety reflections of queries and responses into LLMs' generation process, unlocking a safety-aware reasoning mechanism. This approach enables self-evaluation at each reasoning step to create safety pivot tokens as indicators of the response's safety status. Furthermore, in order to improve the learning efficiency of pivot token prediction, we propose Contrastive Pivot Optimization(CPO), which enhances the model's ability to perceive the safety status of dialogues. Through this mechanism, LLMs dynamically adjust their response strategies during reasoning, significantly enhancing their defense capabilities against jailbreak attacks. Extensive experimental results demonstrate that R2D effectively mitigates various attacks and improves overall safety, highlighting the substantial potential of safety-aware reasoning in strengthening LLMs' robustness against jailbreaks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12970v1-abstract-full').style.display = 'none'; document.getElementById('2502.12970v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11414">arXiv:2502.11414</a> <span> [<a href="https://arxiv.org/pdf/2502.11414">pdf</a>, <a href="https://arxiv.org/format/2502.11414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3701716.3715458">10.1145/3701716.3715458 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Unbiased Learning to Rank with Query-Level Click Propensity Estimation: Beyond Pointwise Observation and Relevance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lulu Yu</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+K">Keping Bi</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shihao Liu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11414v2-abstract-short" style="display: inline;"> Most existing unbiased learning-to-rank (ULTR) approaches are based on the user examination hypothesis, which assumes that users will click a result only if it is both relevant and observed (typically modeled by position). However, in real-world scenarios, users often click only one or two results after examining multiple relevant options, due to limited patience or because their information needs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11414v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11414v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11414v2-abstract-full" style="display: none;"> Most existing unbiased learning-to-rank (ULTR) approaches are based on the user examination hypothesis, which assumes that users will click a result only if it is both relevant and observed (typically modeled by position). However, in real-world scenarios, users often click only one or two results after examining multiple relevant options, due to limited patience or because their information needs have already been satisfied. Motivated by this, we propose a query-level click propensity model to capture the probability that users will click on different result lists, allowing for non-zero probabilities that users may not click on an observed relevant result. We hypothesize that this propensity increases when more potentially relevant results are present, and refer to this user behavior as relevance saturation bias. Our method introduces a Dual Inverse Propensity Weighting (DualIPW) mechanism -- combining query-level and position-level IPW -- to address both relevance saturation and position bias. Through theoretical derivation, we prove that DualIPW can learn an unbiased ranking model. Experiments on the real-world Baidu-ULTR dataset demonstrate that our approach significantly outperforms state-of-the-art ULTR baselines. The code and dataset information can be found at https://github.com/Trustworthy-Information-Access/DualIPW. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11414v2-abstract-full').style.display = 'none'; document.getElementById('2502.11414v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, accepted by The ACM Web Conference (WWW) 2025 Short Paper Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11387">arXiv:2502.11387</a> <span> [<a href="https://arxiv.org/pdf/2502.11387">pdf</a>, <a href="https://arxiv.org/format/2502.11387">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RoleMRC: A Fine-Grained Composite Benchmark for Role-Playing and Instruction-Following </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+J">Junru Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiazheng Li</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+G">Guodong Shen</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+L">Lin Gui</a>, <a href="/search/cs?searchtype=author&query=An%2C+S">Siyu An</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yulan He</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Di Yin</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xing Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11387v1-abstract-short" style="display: inline;"> Role-playing is important for Large Language Models (LLMs) to follow diverse instructions while maintaining role identity and the role's pre-defined ability limits. Existing role-playing datasets mostly contribute to controlling role style and knowledge boundaries, but overlook role-playing in instruction-following scenarios. We introduce a fine-grained role-playing and instruction-following compo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11387v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11387v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11387v1-abstract-full" style="display: none;"> Role-playing is important for Large Language Models (LLMs) to follow diverse instructions while maintaining role identity and the role's pre-defined ability limits. Existing role-playing datasets mostly contribute to controlling role style and knowledge boundaries, but overlook role-playing in instruction-following scenarios. We introduce a fine-grained role-playing and instruction-following composite benchmark, named RoleMRC, including: (1) Multi-turn dialogues between ideal roles and humans, including free chats or discussions upon given passages; (2) Role-playing machine reading comprehension, involving response, refusal, and attempts according to passage answerability and role ability; (3) More complex scenarios with nested, multi-turn and prioritized instructions. The final RoleMRC features a 10.2k role profile meta-pool, 37.9k well-synthesized role-playing instructions, and 1.4k testing samples. We develop a pipeline to quantitatively evaluate the fine-grained role-playing and instruction-following capabilities of several mainstream LLMs, as well as models that are fine-tuned on our data. Moreover, cross-evaluation on external role-playing datasets confirms that models fine-tuned on RoleMRC enhances instruction-following without compromising general role-playing and reasoning capabilities. We also probe the neural-level activation maps of different capabilities over post-tuned LLMs. Access to our RoleMRC, RoleMRC-mix and Codes: https://github.com/LuJunru/RoleMRC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11387v1-abstract-full').style.display = 'none'; document.getElementById('2502.11387v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11177">arXiv:2502.11177</a> <span> [<a href="https://arxiv.org/pdf/2502.11177">pdf</a>, <a href="https://arxiv.org/format/2502.11177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> The Mirage of Model Editing: Revisiting Evaluation in the Wild </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wanli Yang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+F">Fei Sun</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+J">Jiajun Tan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Q">Qi Cao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Huawei Shen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11177v3-abstract-short" style="display: inline;"> Despite near-perfect results in artificial evaluations, the effectiveness of model editing in real-world applications remains unexplored. To bridge this gap, we propose to study model editing in question answering (QA) by establishing a rigorous evaluation practice to assess the effectiveness of editing methods in correcting LLMs' errors. It consists of QAEdit, a new benchmark derived from popular… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11177v3-abstract-full').style.display = 'inline'; document.getElementById('2502.11177v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11177v3-abstract-full" style="display: none;"> Despite near-perfect results in artificial evaluations, the effectiveness of model editing in real-world applications remains unexplored. To bridge this gap, we propose to study model editing in question answering (QA) by establishing a rigorous evaluation practice to assess the effectiveness of editing methods in correcting LLMs' errors. It consists of QAEdit, a new benchmark derived from popular QA datasets, and a standardized evaluation framework. Our single editing experiments indicate that current editing methods perform substantially worse than previously reported (38.5% vs. ~96%). Through module analysis and controlled experiments, we demonstrate that this performance decline stems from issues in evaluation practices of prior editing research. One key issue is the inappropriate use of teacher forcing in testing prevents error propagation by feeding ground truth tokens (inaccessible in real-world scenarios) as input. Furthermore, we simulate real-world deployment by sequential editing, revealing that current approaches fail drastically with only 1000 edits. Our analysis provides a fundamental reexamination of both the real-world applicability of existing model editing methods and their evaluation practices, and establishes a rigorous evaluation framework with key insights to advance reliable and practical model editing research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11177v3-abstract-full').style.display = 'none'; document.getElementById('2502.11177v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08346">arXiv:2502.08346</a> <span> [<a href="https://arxiv.org/pdf/2502.08346">pdf</a>, <a href="https://arxiv.org/format/2502.08346">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Graph Foundation Models for Recommendation: A Comprehensive Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bin Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yihang Wang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yuanhao Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jiashu Zhao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Cheng Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yawen Li</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Long Xia</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+C">Chuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08346v3-abstract-short" style="display: inline;"> Recommender systems (RS) serve as a fundamental tool for navigating the vast expanse of online information, with deep learning advancements playing an increasingly important role in improving ranking accuracy. Among these, graph neural networks (GNNs) excel at extracting higher-order structural information, while large language models (LLMs) are designed to process and comprehend natural language,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08346v3-abstract-full').style.display = 'inline'; document.getElementById('2502.08346v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08346v3-abstract-full" style="display: none;"> Recommender systems (RS) serve as a fundamental tool for navigating the vast expanse of online information, with deep learning advancements playing an increasingly important role in improving ranking accuracy. Among these, graph neural networks (GNNs) excel at extracting higher-order structural information, while large language models (LLMs) are designed to process and comprehend natural language, making both approaches highly effective and widely adopted. Recent research has focused on graph foundation models (GFMs), which integrate the strengths of GNNs and LLMs to model complex RS problems more efficiently by leveraging the graph-based structure of user-item relationships alongside textual understanding. In this survey, we provide a comprehensive overview of GFM-based RS technologies by introducing a clear taxonomy of current approaches, diving into methodological details, and highlighting key challenges and future directions. By synthesizing recent advancements, we aim to offer valuable insights into the evolving landscape of GFM-based recommender systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08346v3-abstract-full').style.display = 'none'; document.getElementById('2502.08346v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05924">arXiv:2502.05924</a> <span> [<a href="https://arxiv.org/pdf/2502.05924">pdf</a>, <a href="https://arxiv.org/format/2502.05924">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Multi-Branch Collaborative Learning Network for Video Quality Assessment in Industrial Video Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hengzhu Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zefeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiping Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xing Wu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Li Gao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">Suqi Cheng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05924v1-abstract-short" style="display: inline;"> Video Quality Assessment (VQA) is vital for large-scale video retrieval systems, aimed at identifying quality issues to prioritize high-quality videos. In industrial systems, low-quality video characteristics fall into four categories: visual-related issues like mosaics and black boxes, textual issues from video titles and OCR content, and semantic issues like frame incoherence and frame-text mism… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05924v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05924v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05924v1-abstract-full" style="display: none;"> Video Quality Assessment (VQA) is vital for large-scale video retrieval systems, aimed at identifying quality issues to prioritize high-quality videos. In industrial systems, low-quality video characteristics fall into four categories: visual-related issues like mosaics and black boxes, textual issues from video titles and OCR content, and semantic issues like frame incoherence and frame-text mismatch from AI-generated videos. Despite their prevalence in industrial settings, these low-quality videos have been largely overlooked in academic research, posing a challenge for accurate identification. To address this, we introduce the Multi-Branch Collaborative Network (MBCN) tailored for industrial video retrieval systems. MBCN features four branches, each designed to tackle one of the aforementioned quality issues. After each branch independently scores videos, we aggregate these scores using a weighted approach and a squeeze-and-excitation mechanism to dynamically address quality issues across different scenarios. We implement point-wise and pair-wise optimization objectives to ensure score stability and reasonableness. Extensive offline and online experiments on a world-level video search engine demonstrate MBCN's effectiveness in identifying video quality issues, significantly enhancing the retrieval system's ranking performance. Detailed experimental analyses confirm the positive contribution of all four evaluation branches. Furthermore, MBCN significantly improves recognition accuracy for low-quality AI-generated videos compared to the baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05924v1-abstract-full').style.display = 'none'; document.getElementById('2502.05924v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">KDD 2025 ADS</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05690">arXiv:2502.05690</a> <span> [<a href="https://arxiv.org/pdf/2502.05690">pdf</a>, <a href="https://arxiv.org/format/2502.05690">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="General Economics">econ.GN</span> </div> </div> <p class="title is-5 mathjax"> Managing Geological Uncertainty in Critical Mineral Supply Chains: A POMDP Approach with Application to U.S. Lithium Resources </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arief%2C+M">Mansur Arief</a>, <a href="/search/cs?searchtype=author&query=Alonso%2C+Y">Yasmine Alonso</a>, <a href="/search/cs?searchtype=author&query=Oshiro%2C+C">CJ Oshiro</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">William Xu</a>, <a href="/search/cs?searchtype=author&query=Corso%2C+A">Anthony Corso</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D+Z">David Zhen Yin</a>, <a href="/search/cs?searchtype=author&query=Caers%2C+J+K">Jef K. Caers</a>, <a href="/search/cs?searchtype=author&query=Kochenderfer%2C+M+J">Mykel J. Kochenderfer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05690v1-abstract-short" style="display: inline;"> The world is entering an unprecedented period of critical mineral demand, driven by the global transition to renewable energy technologies and electric vehicles. This transition presents unique challenges in mineral resource development, particularly due to geological uncertainty-a key characteristic that traditional supply chain optimization approaches do not adequately address. To tackle this ch… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05690v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05690v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05690v1-abstract-full" style="display: none;"> The world is entering an unprecedented period of critical mineral demand, driven by the global transition to renewable energy technologies and electric vehicles. This transition presents unique challenges in mineral resource development, particularly due to geological uncertainty-a key characteristic that traditional supply chain optimization approaches do not adequately address. To tackle this challenge, we propose a novel application of Partially Observable Markov Decision Processes (POMDPs) that optimizes critical mineral sourcing decisions while explicitly accounting for the dynamic nature of geological uncertainty. Through a case study of the U.S. lithium supply chain, we demonstrate that POMDP-based policies achieve superior outcomes compared to traditional approaches, especially when initial reserve estimates are imperfect. Our framework provides quantitative insights for balancing domestic resource development with international supply diversification, offering policymakers a systematic approach to strategic decision-making in critical mineral supply chains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05690v1-abstract-full').style.display = 'none'; document.getElementById('2502.05690v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02584">arXiv:2502.02584</a> <span> [<a href="https://arxiv.org/pdf/2502.02584">pdf</a>, <a href="https://arxiv.org/format/2502.02584">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> QLASS: Boosting Language Agent Inference via Q-Guided Stepwise Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zongyu Lin</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yao Tang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xingcheng Yao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Da Yin</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Ziniu Hu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yizhou Sun</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+K">Kai-Wei Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02584v1-abstract-short" style="display: inline;"> Language agents have become a promising solution to complex interactive tasks. One of the key ingredients to the success of language agents is the reward model on the trajectory of the agentic workflow, which provides valuable guidance during training or inference. However, due to the lack of annotations of intermediate interactions, most existing works use an outcome reward model to optimize poli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02584v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02584v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02584v1-abstract-full" style="display: none;"> Language agents have become a promising solution to complex interactive tasks. One of the key ingredients to the success of language agents is the reward model on the trajectory of the agentic workflow, which provides valuable guidance during training or inference. However, due to the lack of annotations of intermediate interactions, most existing works use an outcome reward model to optimize policies across entire trajectories. This may lead to sub-optimal policies and hinder the overall performance. To address this, we propose QLASS (Q-guided Language Agent Stepwise Search), to automatically generate annotations by estimating Q-values in a stepwise manner for open language agents. By introducing a reasoning tree and performing process reward modeling, QLASS provides effective intermediate guidance for each step. With the stepwise guidance, we propose a Q-guided generation strategy to enable language agents to better adapt to long-term value, resulting in significant performance improvement during model inference on complex interactive agent tasks. Notably, even with almost half the annotated data, QLASS retains strong performance, demonstrating its efficiency in handling limited supervision. We also empirically demonstrate that QLASS can lead to more effective decision making through qualitative analysis. We will release our code and data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02584v1-abstract-full').style.display = 'none'; document.getElementById('2502.02584v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01549">arXiv:2502.01549</a> <span> [<a href="https://arxiv.org/pdf/2502.01549">pdf</a>, <a href="https://arxiv.org/format/2502.01549">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VideoRAG: Retrieval-Augmented Generation with Extreme Long-Context Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+X">Xubin Ren</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Lingrui Xu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Long Xia</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01549v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) has demonstrated remarkable success in enhancing Large Language Models (LLMs) through external knowledge integration, yet its application has primarily focused on textual content, leaving the rich domain of multi-modal video knowledge predominantly unexplored. This paper introduces VideoRAG, the first retrieval-augmented generation framework specifically design… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01549v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01549v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01549v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) has demonstrated remarkable success in enhancing Large Language Models (LLMs) through external knowledge integration, yet its application has primarily focused on textual content, leaving the rich domain of multi-modal video knowledge predominantly unexplored. This paper introduces VideoRAG, the first retrieval-augmented generation framework specifically designed for processing and understanding extremely long-context videos. Our core innovation lies in its dual-channel architecture that seamlessly integrates (i) graph-based textual knowledge grounding for capturing cross-video semantic relationships, and (ii) multi-modal context encoding for efficiently preserving visual features. This novel design empowers VideoRAG to process unlimited-length videos by constructing precise knowledge graphs that span multiple videos while maintaining semantic dependencies through specialized multi-modal retrieval paradigms. Through comprehensive empirical evaluation on our proposed LongerVideos benchmark-comprising over 160 videos totaling 134+ hours across lecture, documentary, and entertainment categories-VideoRAG demonstrates substantial performance compared to existing RAG alternatives and long video understanding methods. The source code of VideoRAG implementation and the benchmark dataset are openly available at: https://github.com/HKUDS/VideoRAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01549v1-abstract-full').style.display = 'none'; document.getElementById('2502.01549v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15228">arXiv:2501.15228</a> <span> [<a href="https://arxiv.org/pdf/2501.15228">pdf</a>, <a href="https://arxiv.org/format/2501.15228">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Improving Retrieval-Augmented Generation through Multi-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yiqun Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyong Yan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Weiwei Sun</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yiming Yang</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+J">Jiaxin Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15228v1-abstract-short" style="display: inline;"> Retrieval-augmented generation (RAG) is extensively utilized to incorporate external, current knowledge into large language models, thereby minimizing hallucinations. A standard RAG pipeline may comprise several components, such as query rewriting, document retrieval, document filtering, and answer generation. However, these components are typically optimized separately through supervised fine-tun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15228v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15228v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15228v1-abstract-full" style="display: none;"> Retrieval-augmented generation (RAG) is extensively utilized to incorporate external, current knowledge into large language models, thereby minimizing hallucinations. A standard RAG pipeline may comprise several components, such as query rewriting, document retrieval, document filtering, and answer generation. However, these components are typically optimized separately through supervised fine-tuning, which can lead to misalignments between the objectives of individual modules and the overarching aim of generating accurate answers in question-answering (QA) tasks. Although recent efforts have explored reinforcement learning (RL) to optimize specific RAG components, these approaches often focus on overly simplistic pipelines with only two components or do not adequately address the complex interdependencies and collaborative interactions among the modules. To overcome these challenges, we propose treating the RAG pipeline as a multi-agent cooperative task, with each component regarded as an RL agent. Specifically, we present MMOA-RAG, a Multi-Module joint Optimization Algorithm for RAG, which employs multi-agent reinforcement learning to harmonize all agents' goals towards a unified reward, such as the F1 score of the final answer. Experiments conducted on various QA datasets demonstrate that MMOA-RAG improves the overall pipeline performance and outperforms existing baselines. Furthermore, comprehensive ablation studies validate the contributions of individual components and the adaptability of MMOA-RAG across different RAG components and datasets. The code of MMOA-RAG is on https://github.com/chenyiqun/MMOA-RAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15228v1-abstract-full').style.display = 'none'; document.getElementById('2501.15228v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12432">arXiv:2501.12432</a> <span> [<a href="https://arxiv.org/pdf/2501.12432">pdf</a>, <a href="https://arxiv.org/format/2501.12432">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Divide-Then-Aggregate: An Efficient Tool Learning Method via Parallel Tool Invocation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+D">Dongsheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weixian Shi</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhengliang Shi</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhaochun Ren</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyong Yan</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12432v1-abstract-short" style="display: inline;"> Although current Large Language Models (LLMs) exhibit impressive capabilities, performing complex real-world tasks still requires tool learning. Mainstream methods, such as CoT/ReAct, rely on step-by-step tool invocation to interact with external environments, but they are limited in perceptual scope and lack adequate task-planning capability. To address these limitations, other studies introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12432v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12432v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12432v1-abstract-full" style="display: none;"> Although current Large Language Models (LLMs) exhibit impressive capabilities, performing complex real-world tasks still requires tool learning. Mainstream methods, such as CoT/ReAct, rely on step-by-step tool invocation to interact with external environments, but they are limited in perceptual scope and lack adequate task-planning capability. To address these limitations, other studies introduce the first Search-based Decision Tree (DFSDT), which still suffers from the high computational cost. In this paper, we introduce a novel parallel tool invocation paradigm, DTA-Llama (Divide-Then-Aggregate Llama). First, we transform traditional tree-based tool search paths into Directed Acyclic Graph (DAG) structure, generating a high-quality parallel tool invocation dataset. The DTA-Llama is then trained on the dataset to learn to iteratively divide the current task into several parallel tool invocation sub-tasks and aggregate the invocation results to decide the next actions. Furthermore, we introduce an efficient inference framework inspired by the Process/Threads mechanism when applying the DTA-Llama to practical tasks. Experimental results show that our approach substantially enhances task performance while reducing token consumption and inference time. Llama2-7B, using our method, is comparable to the official parallel function calling method of GPT-3.5. The relevant code, dataset, and model weights are available at https://corn0205.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12432v1-abstract-full').style.display = 'none'; document.getElementById('2501.12432v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11671">arXiv:2501.11671</a> <span> [<a href="https://arxiv.org/pdf/2501.11671">pdf</a>, <a href="https://arxiv.org/format/2501.11671">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Exploring Preference-Guided Diffusion Model for Cross-Domain Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaodong Li</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hengzhu Tang</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+J">Jiawei Sheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinghua Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Li Gao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">Suqi Cheng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tingwen Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11671v1-abstract-short" style="display: inline;"> Cross-domain recommendation (CDR) has been proven as a promising way to alleviate the cold-start issue, in which the most critical problem is how to draw an informative user representation in the target domain via the transfer of user preference existing in the source domain. Prior efforts mostly follow the embedding-and-mapping paradigm, which first integrate the preference into user representati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11671v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11671v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11671v1-abstract-full" style="display: none;"> Cross-domain recommendation (CDR) has been proven as a promising way to alleviate the cold-start issue, in which the most critical problem is how to draw an informative user representation in the target domain via the transfer of user preference existing in the source domain. Prior efforts mostly follow the embedding-and-mapping paradigm, which first integrate the preference into user representation in the source domain, and then perform a mapping function on this representation to the target domain. However, they focus on mapping features across domains, neglecting to explicitly model the preference integration process, which may lead to learning coarse user representation. Diffusion models (DMs), which contribute to more accurate user/item representations due to their explicit information injection capability, have achieved promising performance in recommendation systems. Nevertheless, these DMs-based methods cannot directly account for valuable user preference in other domains, leading to challenges in adapting to the transfer of preference for cold-start users. Consequently, the feasibility of DMs for CDR remains underexplored. To this end, we explore to utilize the explicit information injection capability of DMs for user preference integration and propose a Preference-Guided Diffusion Model for CDR to cold-start users, termed as DMCDR. Specifically, we leverage a preference encoder to establish the preference guidance signal with the user's interaction history in the source domain. Then, we explicitly inject the preference guidance signal into the user representation step by step to guide the reverse process, and ultimately generate the personalized user representation in the target domain, thus achieving the transfer of user preference across domains. Furthermore, we comprehensively explore the impact of six DMs-based variants on CDR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11671v1-abstract-full').style.display = 'none'; document.getElementById('2501.11671v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted by KDD'2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11034">arXiv:2501.11034</a> <span> [<a href="https://arxiv.org/pdf/2501.11034">pdf</a>, <a href="https://arxiv.org/format/2501.11034">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Generative Retrieval for Book search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yubao Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=de+Rijke%2C+M">Maarten de Rijke</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shihao Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqing Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11034v1-abstract-short" style="display: inline;"> In book search, relevant book information should be returned in response to a query. Books contain complex, multi-faceted information such as metadata, outlines, and main text, where the outline provides hierarchical information between chapters and sections. Generative retrieval (GR) is a new retrieval paradigm that consolidates corpus information into a single model to generate identifiers of do… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11034v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11034v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11034v1-abstract-full" style="display: none;"> In book search, relevant book information should be returned in response to a query. Books contain complex, multi-faceted information such as metadata, outlines, and main text, where the outline provides hierarchical information between chapters and sections. Generative retrieval (GR) is a new retrieval paradigm that consolidates corpus information into a single model to generate identifiers of documents that are relevant to a given query. How can GR be applied to book search? Directly applying GR to book search is a challenge due to the unique characteristics of book search: The model needs to retain the complex, multi-faceted information of the book, which increases the demand for labeled data. Splitting book information and treating it as a collection of separate segments for learning might result in a loss of hierarchical information. We propose an effective Generative retrieval framework for Book Search (GBS) that features two main components: data augmentation and outline-oriented book encoding. For data augmentation, GBS constructs multiple query-book pairs for training; it constructs multiple book identifiers based on the outline, various forms of book contents, and simulates real book retrieval scenarios with varied pseudo-queries. This includes coverage-promoting book identifier augmentation, allowing the model to learn to index effectively, and diversity-enhanced query augmentation, allowing the model to learn to retrieve effectively. Outline-oriented book encoding improves length extrapolation through bi-level positional encoding and retentive attention mechanisms to maintain context over long sequences. Experiments on a proprietary Baidu dataset demonstrate that GBS outperforms strong baselines, achieving a 9.8\% improvement in terms of MRR@20, over the state-of-the-art RIPOR method... <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11034v1-abstract-full').style.display = 'none'; document.getElementById('2501.11034v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at KDD ADS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02226">arXiv:2501.02226</a> <span> [<a href="https://arxiv.org/pdf/2501.02226">pdf</a>, <a href="https://arxiv.org/format/2501.02226">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Knowledge Graph Retrieval-Augmented Generation for LLM-based Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shijie Wang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+W">Wenqi Fan</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yue Feng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02226v1-abstract-short" style="display: inline;"> Recommender systems have become increasingly vital in our daily lives, helping to alleviate the problem of information overload across various user-oriented online services. The emergence of Large Language Models (LLMs) has yielded remarkable achievements, demonstrating their potential for the development of next-generation recommender systems. Despite these advancements, LLM-based recommender sys… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02226v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02226v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02226v1-abstract-full" style="display: none;"> Recommender systems have become increasingly vital in our daily lives, helping to alleviate the problem of information overload across various user-oriented online services. The emergence of Large Language Models (LLMs) has yielded remarkable achievements, demonstrating their potential for the development of next-generation recommender systems. Despite these advancements, LLM-based recommender systems face inherent limitations stemming from their LLM backbones, particularly issues of hallucinations and the lack of up-to-date and domain-specific knowledge. Recently, Retrieval-Augmented Generation (RAG) has garnered significant attention for addressing these limitations by leveraging external knowledge sources to enhance the understanding and generation of LLMs. However, vanilla RAG methods often introduce noise and neglect structural relationships in knowledge, limiting their effectiveness in LLM-based recommendations. To address these limitations, we propose to retrieve high-quality and up-to-date structure information from the knowledge graph (KG) to augment recommendations. Specifically, our approach develops a retrieval-augmented framework, termed K-RagRec, that facilitates the recommendation generation process by incorporating structure information from the external KG. Extensive experiments have been conducted to demonstrate the effectiveness of our proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02226v1-abstract-full').style.display = 'none'; document.getElementById('2501.02226v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint. Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17847">arXiv:2412.17847</a> <span> [<a href="https://arxiv.org/pdf/2412.17847">pdf</a>, <a href="https://arxiv.org/format/2412.17847">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Bridging the Data Provenance Gap Across Text, Speech and Video </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Longpre%2C+S">Shayne Longpre</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+N">Nikhil Singh</a>, <a href="/search/cs?searchtype=author&query=Cherep%2C+M">Manuel Cherep</a>, <a href="/search/cs?searchtype=author&query=Tiwary%2C+K">Kushagra Tiwary</a>, <a href="/search/cs?searchtype=author&query=Materzynska%2C+J">Joanna Materzynska</a>, <a href="/search/cs?searchtype=author&query=Brannon%2C+W">William Brannon</a>, <a href="/search/cs?searchtype=author&query=Mahari%2C+R">Robert Mahari</a>, <a href="/search/cs?searchtype=author&query=Obeng-Marnu%2C+N">Naana Obeng-Marnu</a>, <a href="/search/cs?searchtype=author&query=Dey%2C+M">Manan Dey</a>, <a href="/search/cs?searchtype=author&query=Hamdy%2C+M">Mohammed Hamdy</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+N">Nayan Saxena</a>, <a href="/search/cs?searchtype=author&query=Anis%2C+A+M">Ahmad Mustafa Anis</a>, <a href="/search/cs?searchtype=author&query=Alghamdi%2C+E+A">Emad A. Alghamdi</a>, <a href="/search/cs?searchtype=author&query=Chien%2C+V+M">Vu Minh Chien</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Da Yin</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+K">Kun Qian</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yizhi Li</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+M">Minnie Liang</a>, <a href="/search/cs?searchtype=author&query=Dinh%2C+A">An Dinh</a>, <a href="/search/cs?searchtype=author&query=Mohanty%2C+S">Shrestha Mohanty</a>, <a href="/search/cs?searchtype=author&query=Mataciunas%2C+D">Deividas Mataciunas</a>, <a href="/search/cs?searchtype=author&query=South%2C+T">Tobin South</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianguo Zhang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+A+N">Ariel N. Lee</a>, <a href="/search/cs?searchtype=author&query=Lund%2C+C+S">Campbell S. Lund</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17847v2-abstract-short" style="display: inline;"> Progress in AI is driven largely by the scale and quality of training data. Despite this, there is a deficit of empirical analysis examining the attributes of well-established datasets beyond text. In this work we conduct the largest and first-of-its-kind longitudinal audit across modalities--popular text, speech, and video datasets--from their detailed sourcing trends and use restrictions to thei… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17847v2-abstract-full').style.display = 'inline'; document.getElementById('2412.17847v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17847v2-abstract-full" style="display: none;"> Progress in AI is driven largely by the scale and quality of training data. Despite this, there is a deficit of empirical analysis examining the attributes of well-established datasets beyond text. In this work we conduct the largest and first-of-its-kind longitudinal audit across modalities--popular text, speech, and video datasets--from their detailed sourcing trends and use restrictions to their geographical and linguistic representation. Our manual analysis covers nearly 4000 public datasets between 1990-2024, spanning 608 languages, 798 sources, 659 organizations, and 67 countries. We find that multimodal machine learning applications have overwhelmingly turned to web-crawled, synthetic, and social media platforms, such as YouTube, for their training sets, eclipsing all other sources since 2019. Secondly, tracing the chain of dataset derivations we find that while less than 33% of datasets are restrictively licensed, over 80% of the source content in widely-used text, speech, and video datasets, carry non-commercial restrictions. Finally, counter to the rising number of languages and geographies represented in public AI training datasets, our audit demonstrates measures of relative geographical and multilingual representation have failed to significantly improve their coverage since 2013. We believe the breadth of our audit enables us to empirically examine trends in data sourcing, restrictions, and Western-centricity at an ecosystem-level, and that visibility into these questions are essential to progress in responsible AI. As a contribution to ongoing improvements in dataset transparency and responsible use, we release our entire multimodal audit, allowing practitioners to trace data provenance across text, speech, and video. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17847v2-abstract-full').style.display = 'none'; document.getElementById('2412.17847v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025. 10 pages, 5 figures (main paper)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14574">arXiv:2412.14574</a> <span> [<a href="https://arxiv.org/pdf/2412.14574">pdf</a>, <a href="https://arxiv.org/format/2412.14574">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Sliding Windows Are Not the End: Exploring Full Ranking with Long-Context Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenhan Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yutao Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Ziliang Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Z">Zhicheng Dou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14574v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have shown exciting performance in listwise passage ranking. Due to the limited input length, existing methods often adopt the sliding window strategy. Such a strategy, though effective, is inefficient as it involves repetitive and serialized processing, which usually re-evaluates relevant passages multiple times. As a result, it incurs redundant API costs, which are p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14574v1-abstract-full').style.display = 'inline'; document.getElementById('2412.14574v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14574v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have shown exciting performance in listwise passage ranking. Due to the limited input length, existing methods often adopt the sliding window strategy. Such a strategy, though effective, is inefficient as it involves repetitive and serialized processing, which usually re-evaluates relevant passages multiple times. As a result, it incurs redundant API costs, which are proportional to the number of inference tokens. The development of long-context LLMs enables the full ranking of all passages within a single inference, avoiding redundant API costs. In this paper, we conduct a comprehensive study of long-context LLMs for ranking tasks in terms of efficiency and effectiveness. Surprisingly, our experiments reveal that full ranking with long-context LLMs can deliver superior performance in the supervised fine-tuning setting with a huge efficiency improvement. Furthermore, we identify two limitations of fine-tuning the full ranking model based on existing methods: (1) sliding window strategy fails to produce a full ranking list as a training label, and (2) the language modeling loss cannot emphasize top-ranked passage IDs in the label. To alleviate these issues, we propose a new complete listwise label construction approach and a novel importance-aware learning objective for full ranking. Experiments show the superior performance of our method over baselines. Our codes are available at \url{https://github.com/8421BCD/fullrank}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14574v1-abstract-full').style.display = 'none'; document.getElementById('2412.14574v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14510">arXiv:2412.14510</a> <span> [<a href="https://arxiv.org/pdf/2412.14510">pdf</a>, <a href="https://arxiv.org/format/2412.14510">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PA-RAG: RAG Alignment via Multi-Perspective Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiayi Wu</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Hengyi Cai</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyong Yan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hao Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+M">Ming Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14510v1-abstract-short" style="display: inline;"> The emergence of Retrieval-augmented generation (RAG) has alleviated the issues of outdated and hallucinatory content in the generation of large language models (LLMs), yet it still reveals numerous limitations. When a general-purpose LLM serves as the RAG generator, it often suffers from inadequate response informativeness, response robustness, and citation quality. Past approaches to tackle thes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14510v1-abstract-full').style.display = 'inline'; document.getElementById('2412.14510v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14510v1-abstract-full" style="display: none;"> The emergence of Retrieval-augmented generation (RAG) has alleviated the issues of outdated and hallucinatory content in the generation of large language models (LLMs), yet it still reveals numerous limitations. When a general-purpose LLM serves as the RAG generator, it often suffers from inadequate response informativeness, response robustness, and citation quality. Past approaches to tackle these limitations, either by incorporating additional steps beyond generating responses or optimizing the generator through supervised fine-tuning (SFT), still failed to align with the RAG requirement thoroughly. Consequently, optimizing the RAG generator from multiple preference perspectives while maintaining its end-to-end LLM form remains a challenge. To bridge this gap, we propose Multiple Perspective Preference Alignment for Retrieval-Augmented Generation (PA-RAG), a method for optimizing the generator of RAG systems to align with RAG requirements comprehensively. Specifically, we construct high-quality instruction fine-tuning data and multi-perspective preference data by sampling varied quality responses from the generator across different prompt documents quality scenarios. Subsequently, we optimize the generator using SFT and Direct Preference Optimization (DPO). Extensive experiments conducted on four question-answer datasets across three LLMs demonstrate that PA-RAG can significantly enhance the performance of RAG generators. Our code and datasets are available at https://github.com/wujwyi/PA-RAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14510v1-abstract-full').style.display = 'none'; document.getElementById('2412.14510v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06483">arXiv:2412.06483</a> <span> [<a href="https://arxiv.org/pdf/2412.06483">pdf</a>, <a href="https://arxiv.org/format/2412.06483">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SafeWorld: Geo-Diverse Safety Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+D">Da Yin</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+H">Haoyi Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kung-Hsiang Huang</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+K">Kai-Wei Chang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+N">Nanyun Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06483v1-abstract-short" style="display: inline;"> In the rapidly evolving field of Large Language Models (LLMs), ensuring safety is a crucial and widely discussed topic. However, existing works often overlook the geo-diversity of cultural and legal standards across the world. To demonstrate the challenges posed by geo-diverse safety standards, we introduce SafeWorld, a novel benchmark specifically designed to evaluate LLMs' ability to generate re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06483v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06483v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06483v1-abstract-full" style="display: none;"> In the rapidly evolving field of Large Language Models (LLMs), ensuring safety is a crucial and widely discussed topic. However, existing works often overlook the geo-diversity of cultural and legal standards across the world. To demonstrate the challenges posed by geo-diverse safety standards, we introduce SafeWorld, a novel benchmark specifically designed to evaluate LLMs' ability to generate responses that are not only helpful but also culturally sensitive and legally compliant across diverse global contexts. SafeWorld encompasses 2,342 test user queries, each grounded in high-quality, human-verified cultural norms and legal policies from 50 countries and 493 regions/races. On top of it, we propose a multi-dimensional automatic safety evaluation framework that assesses the contextual appropriateness, accuracy, and comprehensiveness of responses. Our evaluations reveal that current LLMs struggle to meet these criteria. To enhance LLMs' alignment with geo-diverse safety standards, we synthesize helpful preference pairs for Direct Preference Optimization (DPO) alignment training. The preference pair construction aims to encourage LLMs to behave appropriately and provide precise references to relevant cultural norms and policies when necessary. Our trained SafeWorldLM outperforms all competing models, including GPT-4o on all three evaluation dimensions by a large margin. Global human evaluators also note a nearly 20% higher winning rate in helpfulness and harmfulness evaluation. Our code and data can be found here: https://github.com/PlusLabNLP/SafeWorld. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06483v1-abstract-full').style.display = 'none'; document.getElementById('2412.06483v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.02172">arXiv:2412.02172</a> <span> [<a href="https://arxiv.org/pdf/2412.02172">pdf</a>, <a href="https://arxiv.org/format/2412.02172">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> VISCO: Benchmarking Fine-Grained Critique and Correction Towards Self-Improvement in Visual Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xueqing Wu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yuheng Ding</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bingxuan Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+P">Pan Lu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Da Yin</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+K">Kai-Wei Chang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+N">Nanyun Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.02172v2-abstract-short" style="display: inline;"> The ability of large vision-language models (LVLMs) to critique and correct their reasoning is an essential building block towards their self-improvement. However, a systematic analysis of such capabilities in LVLMs is still lacking. We propose VISCO, the first benchmark to extensively analyze the fine-grained critique and correction capabilities of LVLMs. Compared to existing work that uses a sin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02172v2-abstract-full').style.display = 'inline'; document.getElementById('2412.02172v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.02172v2-abstract-full" style="display: none;"> The ability of large vision-language models (LVLMs) to critique and correct their reasoning is an essential building block towards their self-improvement. However, a systematic analysis of such capabilities in LVLMs is still lacking. We propose VISCO, the first benchmark to extensively analyze the fine-grained critique and correction capabilities of LVLMs. Compared to existing work that uses a single scalar value to critique the entire reasoning [4], VISCO features dense and fine-grained critique, requiring LVLMs to evaluate the correctness of each step in the chain-of-thought and provide natural language explanations to support their judgments. Extensive evaluation of 24 LVLMs demonstrates that human-written critiques significantly enhance the performance after correction, showcasing the potential of the self-improvement strategy. However, the model-generated critiques are less helpful and sometimes detrimental to the performance, suggesting that critique is the crucial bottleneck. We identified three common patterns in critique failures: failure to critique visual perception, reluctance to "say no", and exaggerated assumption of error propagation. To address these issues, we propose an effective LookBack strategy that revisits the image to verify each piece of information in the initial reasoning. LookBack significantly improves critique and correction performance by up to 13.5%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02172v2-abstract-full').style.display = 'none'; document.getElementById('2412.02172v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2025. https://visco-benchmark.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18651">arXiv:2411.18651</a> <span> [<a href="https://arxiv.org/pdf/2411.18651">pdf</a>, <a href="https://arxiv.org/format/2411.18651">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Verbalized Representation Learning for Interpretable Few-Shot Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+C">Cheng-Fu Yang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Da Yin</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+W">Wenbo Hu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+N">Nanyun Peng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+B">Bolei Zhou</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+K">Kai-Wei Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18651v1-abstract-short" style="display: inline;"> Humans recognize objects after observing only a few examples, a remarkable capability enabled by their inherent language understanding of the real-world environment. Developing verbalized and interpretable representation can significantly improve model generalization in low-data settings. In this work, we propose Verbalized Representation Learning (VRL), a novel approach for automatically extracti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18651v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18651v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18651v1-abstract-full" style="display: none;"> Humans recognize objects after observing only a few examples, a remarkable capability enabled by their inherent language understanding of the real-world environment. Developing verbalized and interpretable representation can significantly improve model generalization in low-data settings. In this work, we propose Verbalized Representation Learning (VRL), a novel approach for automatically extracting human-interpretable features for object recognition using few-shot data. Our method uniquely captures inter-class differences and intra-class commonalities in the form of natural language by employing a Vision-Language Model (VLM) to identify key discriminative features between different classes and shared characteristics within the same class. These verbalized features are then mapped to numeric vectors through the VLM. The resulting feature vectors can be further utilized to train and infer with downstream classifiers. Experimental results show that, at the same model scale, VRL achieves a 24% absolute improvement over prior state-of-the-art methods while using 95% less data and a smaller mode. Furthermore, compared to human-labeled attributes, the features learned by VRL exhibit a 20% absolute gain when used for downstream classification tasks. Code is available at: https://github.com/joeyy5588/VRL/tree/main. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18651v1-abstract-full').style.display = 'none'; document.getElementById('2411.18651v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01798">arXiv:2411.01798</a> <span> [<a href="https://arxiv.org/pdf/2411.01798">pdf</a>, <a href="https://arxiv.org/format/2411.01798">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SALSA: Soup-based Alignment Learning for Stronger Adaptation in RLHF </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chegini%2C+A">Atoosa Chegini</a>, <a href="/search/cs?searchtype=author&query=Kazemi%2C+H">Hamid Kazemi</a>, <a href="/search/cs?searchtype=author&query=Mirzadeh%2C+I">Iman Mirzadeh</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dong Yin</a>, <a href="/search/cs?searchtype=author&query=Horton%2C+M">Maxwell Horton</a>, <a href="/search/cs?searchtype=author&query=Nabi%2C+M">Moin Nabi</a>, <a href="/search/cs?searchtype=author&query=Farajtabar%2C+M">Mehrdad Farajtabar</a>, <a href="/search/cs?searchtype=author&query=Alizadeh%2C+K">Keivan Alizadeh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01798v1-abstract-short" style="display: inline;"> In Large Language Model (LLM) development, Reinforcement Learning from Human Feedback (RLHF) is crucial for aligning models with human values and preferences. RLHF traditionally relies on the Kullback-Leibler (KL) divergence between the current policy and a frozen initial policy as a reference, which is added as a penalty in policy optimization algorithms like Proximal Policy Optimization (PPO). W… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01798v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01798v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01798v1-abstract-full" style="display: none;"> In Large Language Model (LLM) development, Reinforcement Learning from Human Feedback (RLHF) is crucial for aligning models with human values and preferences. RLHF traditionally relies on the Kullback-Leibler (KL) divergence between the current policy and a frozen initial policy as a reference, which is added as a penalty in policy optimization algorithms like Proximal Policy Optimization (PPO). While this constraint prevents models from deviating too far from the initial checkpoint, it limits exploration of the reward landscape, reducing the model's ability to discover higher-quality solutions. As a result, policy optimization is often trapped in a narrow region of the parameter space, leading to suboptimal alignment and performance. This paper presents SALSA (Soup-based Alignment Learning for Stronger Adaptation), a novel approach designed to overcome these limitations by creating a more flexible and better located reference model through weight-space averaging of two independent supervised fine-tuned (SFT) models. This model soup allows for larger deviation in KL divergence and exploring a promising region of the solution space without sacrificing stability. By leveraging this more robust reference model, SALSA fosters better exploration, achieving higher rewards and improving model robustness, out-of-distribution generalization, and performance. We validate the effectiveness of SALSA through extensive experiments on popular open models (Llama2-7B, Mistral-7B, and Gemma-2B) across various benchmarks (MT-Bench, Arena-Hard, UltraFeedback), where it consistently surpasses PPO by fostering deeper exploration and achieving superior alignment in LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01798v1-abstract-full').style.display = 'none'; document.getElementById('2411.01798v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21728">arXiv:2410.21728</a> <span> [<a href="https://arxiv.org/pdf/2410.21728">pdf</a>, <a href="https://arxiv.org/format/2410.21728">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Let's Be Self-generated via Step by Step: A Curriculum Learning Approach to Automated Reasoning with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+K">Kangyang Luo</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Z">Zichen Ding</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+Z">Zhenmin Weng</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+L">Lingfeng Qiao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Meng Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Di Yin</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+J">Jinlong Shu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21728v2-abstract-short" style="display: inline;"> While Chain of Thought (CoT) prompting approaches have significantly consolidated the reasoning capabilities of large language models (LLMs), they still face limitations that require extensive human effort or have performance needs to be improved. Existing endeavors have focused on bridging these gaps; however, these approaches either hinge on external data and cannot completely eliminate manual e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21728v2-abstract-full').style.display = 'inline'; document.getElementById('2410.21728v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21728v2-abstract-full" style="display: none;"> While Chain of Thought (CoT) prompting approaches have significantly consolidated the reasoning capabilities of large language models (LLMs), they still face limitations that require extensive human effort or have performance needs to be improved. Existing endeavors have focused on bridging these gaps; however, these approaches either hinge on external data and cannot completely eliminate manual effort, or they fall short in effectively directing LLMs to generate high-quality exemplary prompts. To address the said pitfalls, we propose a novel prompt approach for automatic reasoning named \textbf{LBS3}, inspired by curriculum learning which better reflects human learning habits. Specifically, LBS3 initially steers LLMs to recall easy-to-hard proxy queries that are pertinent to the target query. Following this, it invokes a progressive strategy that utilizes exemplary prompts stemmed from easy-proxy queries to direct LLMs in solving hard-proxy queries, enabling the high-quality of the proxy solutions. Finally, our extensive experiments in various reasoning-intensive tasks with varying open- and closed-source LLMs show that LBS3 achieves strongly competitive performance compared to the SOTA baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21728v2-abstract-full').style.display = 'none'; document.getElementById('2410.21728v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20533">arXiv:2410.20533</a> <span> [<a href="https://arxiv.org/pdf/2410.20533">pdf</a>, <a href="https://arxiv.org/format/2410.20533">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Guiding Through Complexity: What Makes Good Supervision for Hard Math Reasoning Tasks? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+X">Xuan He</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Da Yin</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+N">Nanyun Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20533v3-abstract-short" style="display: inline;"> How can "weak teacher models" such as average human annotators or existing AI systems, effectively supervise LLMs to improve performance on hard reasoning tasks, especially those that challenge and requires expertise or daily practice from the teacher models? In this paper, we seek for empirical answers to this question by investigating various data-driven strategies that offer supervision data at… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20533v3-abstract-full').style.display = 'inline'; document.getElementById('2410.20533v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20533v3-abstract-full" style="display: none;"> How can "weak teacher models" such as average human annotators or existing AI systems, effectively supervise LLMs to improve performance on hard reasoning tasks, especially those that challenge and requires expertise or daily practice from the teacher models? In this paper, we seek for empirical answers to this question by investigating various data-driven strategies that offer supervision data at different quality levels upon tasks of varying complexity. Two intuitive strategies emerge for teacher models to provide supervision during alignment training: 1) using lower-quality supervision from complete tasks that match the difficulty of the target reasoning tasks, and 2) leveraging higher-quality supervision from easier subtasks that are less challenging. Interestingly, we find that even when the outcome error rate for hard task supervision is high (e.g., 90\%), training on such data can outperform perfectly correct supervision of easier subtasks on multiple hard math benchmarks. We further identify a more critical factor influencing training performance: step-wise error rates, which indicate the severity of errors in solutions. Specifically, training on hard task supervision with the same outcome error rates but disparate step-wise error rates can lead to a 30\% accuracy gap on MATH benchmark. Our results also reveal that supplementing hard task supervision with the corresponding subtask supervision can yield notable performance improvements than simply combining rephrased hard full task supervision, suggesting new avenues for data augmentation. Data and code are released at https://github.com/hexuan21/Weak-to-Strong. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20533v3-abstract-full').style.display = 'none'; document.getElementById('2410.20533v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2025 Main</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17599">arXiv:2410.17599</a> <span> [<a href="https://arxiv.org/pdf/2410.17599">pdf</a>, <a href="https://arxiv.org/format/2410.17599">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Cross-model Control: Improving Multiple Large Language Models in One-time Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiayi Wu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hao Sun</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Hengyi Cai</a>, <a href="/search/cs?searchtype=author&query=Su%2C+L">Lixin Su</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+M">Ming Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17599v1-abstract-short" style="display: inline;"> The number of large language models (LLMs) with varying parameter scales and vocabularies is increasing. While they deliver powerful performance, they also face a set of common optimization needs to meet specific requirements or standards, such as instruction following or avoiding the output of sensitive information from the real world. However, how to reuse the fine-tuning outcomes of one model t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17599v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17599v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17599v1-abstract-full" style="display: none;"> The number of large language models (LLMs) with varying parameter scales and vocabularies is increasing. While they deliver powerful performance, they also face a set of common optimization needs to meet specific requirements or standards, such as instruction following or avoiding the output of sensitive information from the real world. However, how to reuse the fine-tuning outcomes of one model to other models to reduce training costs remains a challenge. To bridge this gap, we introduce Cross-model Control (CMC), a method that improves multiple LLMs in one-time training with a portable tiny language model. Specifically, we have observed that the logit shift before and after fine-tuning is remarkably similar across different models. Based on this insight, we incorporate a tiny language model with a minimal number of parameters. By training alongside a frozen template LLM, the tiny model gains the capability to alter the logits output by the LLMs. To make this tiny language model applicable to models with different vocabularies, we propose a novel token mapping strategy named PM-MinED. We have conducted extensive experiments on instruction tuning and unlearning tasks, demonstrating the effectiveness of CMC. Our code is available at https://github.com/wujwyi/CMC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17599v1-abstract-full').style.display = 'none'; document.getElementById('2410.17599v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13181">arXiv:2410.13181</a> <span> [<a href="https://arxiv.org/pdf/2410.13181">pdf</a>, <a href="https://arxiv.org/format/2410.13181">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AdaSwitch: Adaptive Switching between Small and Large Agents for Effective Cloud-Local Collaborative Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hao Sun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiayi Wu</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Hengyi Cai</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xiaochi Wei</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yue Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bo Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yan Zhang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13181v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) have been remarkable. Users face a choice between using cloud-based LLMs for generation quality and deploying local-based LLMs for lower computational cost. The former option is typically costly and inefficient, while the latter usually fails to deliver satisfactory performance for reasoning steps requiring deliberate thought processes. In this w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13181v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13181v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13181v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) have been remarkable. Users face a choice between using cloud-based LLMs for generation quality and deploying local-based LLMs for lower computational cost. The former option is typically costly and inefficient, while the latter usually fails to deliver satisfactory performance for reasoning steps requiring deliberate thought processes. In this work, we propose a novel LLM utilization paradigm that facilitates the collaborative operation of large cloud-based LLMs and smaller local-deployed LLMs. Our framework comprises two primary modules: the local agent instantiated with a relatively smaller LLM, handling less complex reasoning steps, and the cloud agent equipped with a larger LLM, managing more intricate reasoning steps. This collaborative processing is enabled through an adaptive mechanism where the local agent introspectively identifies errors and proactively seeks assistance from the cloud agent, thereby effectively integrating the strengths of both locally-deployed and cloud-based LLMs, resulting in significant enhancements in task completion performance and efficiency. We evaluate AdaSwitch across 7 benchmarks, ranging from mathematical reasoning and complex question answering, using various types of LLMs to instantiate the local and cloud agents. The empirical results show that AdaSwitch effectively improves the performance of the local agent, and sometimes achieves competitive results compared to the cloud agent while utilizing much less computational overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13181v1-abstract-full').style.display = 'none'; document.getElementById('2410.13181v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024 Main Conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12855">arXiv:2410.12855</a> <span> [<a href="https://arxiv.org/pdf/2410.12855">pdf</a>, <a href="https://arxiv.org/format/2410.12855">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> JAILJUDGE: A Comprehensive Jailbreak Judge Benchmark with Multi-Agent Enhanced Explanation Evaluation Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fan Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yue Feng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhao Xu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+L">Lixin Su</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12855v2-abstract-short" style="display: inline;"> Despite advancements in enhancing LLM safety against jailbreak attacks, evaluating LLM defenses remains a challenge, with current methods often lacking explainability and generalization to complex scenarios, leading to incomplete assessments (e.g., direct judgment without reasoning, low F1 score of GPT-4 in complex cases, bias in multilingual scenarios). To address this, we present JAILJUDGE, a co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12855v2-abstract-full').style.display = 'inline'; document.getElementById('2410.12855v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12855v2-abstract-full" style="display: none;"> Despite advancements in enhancing LLM safety against jailbreak attacks, evaluating LLM defenses remains a challenge, with current methods often lacking explainability and generalization to complex scenarios, leading to incomplete assessments (e.g., direct judgment without reasoning, low F1 score of GPT-4 in complex cases, bias in multilingual scenarios). To address this, we present JAILJUDGE, a comprehensive benchmark featuring diverse risk scenarios, including synthetic, adversarial, in-the-wild, and multilingual prompts, along with high-quality human-annotated datasets. The JAILJUDGE dataset includes over 35k+ instruction-tune data with reasoning explainability and JAILJUDGETEST, a 4.5k+ labeled set for risk scenarios, and a 6k+ multilingual set across ten languages. To enhance evaluation with explicit reasoning, we propose the JailJudge MultiAgent framework, which enables explainable, fine-grained scoring (1 to 10). This framework supports the construction of instruction-tuning ground truth and facilitates the development of JAILJUDGE Guard, an end-to-end judge model that provides reasoning and eliminates API costs. Additionally, we introduce JailBoost, an attacker-agnostic attack enhancer, and GuardShield, a moderation defense, both leveraging JAILJUDGE Guard. Our experiments demonstrate the state-of-the-art performance of JailJudge methods (JailJudge MultiAgent, JAILJUDGE Guard) across diverse models (e.g., GPT-4, Llama-Guard) and zero-shot scenarios. JailBoost and GuardShield significantly improve jailbreak attack and defense tasks under zero-shot settings, with JailBoost enhancing performance by 29.24% and GuardShield reducing defense ASR from 40.46% to 0.15%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12855v2-abstract-full').style.display = 'none'; document.getElementById('2410.12855v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10798">arXiv:2410.10798</a> <span> [<a href="https://arxiv.org/pdf/2410.10798">pdf</a>, <a href="https://arxiv.org/format/2410.10798">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MMAR: Towards Lossless Multi-Modal Auto-Regressive Probabilistic Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dacheng Yin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yizhou Zhou</a>, <a href="/search/cs?searchtype=author&query=Rao%2C+F">Fengyun Rao</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+W">Wei Zhai</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Zha%2C+Z">Zheng-Jun Zha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10798v2-abstract-short" style="display: inline;"> Recent advancements in multi-modal large language models have propelled the development of joint probabilistic models capable of both image understanding and generation. However, we have identified that recent methods inevitably suffer from loss of image information during understanding task, due to either image discretization or diffusion denoising steps. To address this issue, we propose a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10798v2-abstract-full').style.display = 'inline'; document.getElementById('2410.10798v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10798v2-abstract-full" style="display: none;"> Recent advancements in multi-modal large language models have propelled the development of joint probabilistic models capable of both image understanding and generation. However, we have identified that recent methods inevitably suffer from loss of image information during understanding task, due to either image discretization or diffusion denoising steps. To address this issue, we propose a novel Multi-Modal Auto-Regressive (MMAR) probabilistic modeling framework. Unlike discretization line of method, MMAR takes in continuous-valued image tokens to avoid information loss. Differing from diffusion-based approaches, we disentangle the diffusion process from auto-regressive backbone model by employing a light-weight diffusion head on top each auto-regressed image patch embedding. In this way, when the model transits from image generation to understanding through text generation, the backbone model's hidden representation of the image is not limited to the last denoising step. To successfully train our method, we also propose a theoretically proven technique that addresses the numerical stability issue and a training strategy that balances the generation and understanding task goals. Through extensive evaluations on 18 image understanding benchmarks, MMAR demonstrates much more superior performance than other joint multi-modal models, matching the method that employs pretrained CLIP vision encoder, meanwhile being able to generate high quality images at the same time. We also showed that our method is scalable with larger data and model size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10798v2-abstract-full').style.display = 'none'; document.getElementById('2410.10798v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10127">arXiv:2410.10127</a> <span> [<a href="https://arxiv.org/pdf/2410.10127">pdf</a>, <a href="https://arxiv.org/format/2410.10127">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> MAIR: A Massive Benchmark for Evaluating Instructed Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+W">Weiwei Sun</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhengliang Shi</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiulong Wu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyong Yan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yiding Liu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+M">Min Cao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhaochun Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10127v1-abstract-short" style="display: inline;"> Recent information retrieval (IR) models are pre-trained and instruction-tuned on massive datasets and tasks, enabling them to perform well on a wide range of tasks and potentially generalize to unseen tasks with instructions. However, existing IR benchmarks focus on a limited scope of tasks, making them insufficient for evaluating the latest IR models. In this paper, we propose MAIR (Massive Inst… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10127v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10127v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10127v1-abstract-full" style="display: none;"> Recent information retrieval (IR) models are pre-trained and instruction-tuned on massive datasets and tasks, enabling them to perform well on a wide range of tasks and potentially generalize to unseen tasks with instructions. However, existing IR benchmarks focus on a limited scope of tasks, making them insufficient for evaluating the latest IR models. In this paper, we propose MAIR (Massive Instructed Retrieval Benchmark), a heterogeneous IR benchmark that includes 126 distinct IR tasks across 6 domains, collected from existing datasets. We benchmark state-of-the-art instruction-tuned text embedding models and re-ranking models. Our experiments reveal that instruction-tuned models generally achieve superior performance compared to non-instruction-tuned models on MAIR. Additionally, our results suggest that current instruction-tuned text embedding models and re-ranking models still lack effectiveness in specific long-tail tasks. MAIR is publicly available at https://github.com/sunnweiwei/Mair. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10127v1-abstract-full').style.display = 'none'; document.getElementById('2410.10127v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08197">arXiv:2410.08197</a> <span> [<a href="https://arxiv.org/pdf/2410.08197">pdf</a>, <a href="https://arxiv.org/format/2410.08197">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> From Exploration to Mastery: Enabling LLMs to Master Tools via Self-Driven Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+C">Changle Qu</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+S">Sunhao Dai</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xiaochi Wei</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Hengyi Cai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jun Xu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+J">Ji-Rong Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08197v2-abstract-short" style="display: inline;"> Tool learning enables Large Language Models (LLMs) to interact with external environments by invoking tools, serving as an effective strategy to mitigate the limitations inherent in their pre-training data. In this process, tool documentation plays a crucial role by providing usage instructions for LLMs, thereby facilitating effective tool utilization. This paper concentrates on the critical chall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08197v2-abstract-full').style.display = 'inline'; document.getElementById('2410.08197v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08197v2-abstract-full" style="display: none;"> Tool learning enables Large Language Models (LLMs) to interact with external environments by invoking tools, serving as an effective strategy to mitigate the limitations inherent in their pre-training data. In this process, tool documentation plays a crucial role by providing usage instructions for LLMs, thereby facilitating effective tool utilization. This paper concentrates on the critical challenge of bridging the comprehension gap between LLMs and external tools due to the inadequacies and inaccuracies inherent in existing human-centric tool documentation. We propose a novel framework, DRAFT, aimed at Dynamically Refining tool documentation through the Analysis of Feedback and Trials emanating from LLMs' interactions with external tools. This methodology pivots on an innovative trial-and-error approach, consisting of three distinct learning phases: experience gathering, learning from experience, and documentation rewriting, to iteratively enhance the tool documentation. This process is further optimized by implementing a diversity-promoting exploration strategy to ensure explorative diversity and a tool-adaptive termination mechanism to prevent overfitting while enhancing efficiency. Extensive experiments on multiple datasets demonstrate that DRAFT's iterative, feedback-based refinement significantly ameliorates documentation quality, fostering a deeper comprehension and more effective utilization of tools by LLMs. Notably, our analysis reveals that the tool documentation refined via our approach demonstrates robust cross-model generalization capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08197v2-abstract-full').style.display = 'none'; document.getElementById('2410.08197v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025 Oral;GitHub:https://github.com/quchangle1/DRAFT</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07672">arXiv:2410.07672</a> <span> [<a href="https://arxiv.org/pdf/2410.07672">pdf</a>, <a href="https://arxiv.org/format/2410.07672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MACPO: Weak-to-Strong Alignment via Multi-Agent Contrastive Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+Y">Yougang Lyu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyong Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zihan Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+P">Pengjie Ren</a>, <a href="/search/cs?searchtype=author&query=de+Rijke%2C+M">Maarten de Rijke</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhaochun Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07672v2-abstract-short" style="display: inline;"> As large language models (LLMs) are rapidly advancing and achieving near-human capabilities on specific tasks, aligning them with human values is becoming more urgent. In scenarios where LLMs outperform humans, we face a weak-to-strong alignment problem where we need to effectively align strong student LLMs through weak supervision generated by weak teachers. Existing alignment methods mainly focu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07672v2-abstract-full').style.display = 'inline'; document.getElementById('2410.07672v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07672v2-abstract-full" style="display: none;"> As large language models (LLMs) are rapidly advancing and achieving near-human capabilities on specific tasks, aligning them with human values is becoming more urgent. In scenarios where LLMs outperform humans, we face a weak-to-strong alignment problem where we need to effectively align strong student LLMs through weak supervision generated by weak teachers. Existing alignment methods mainly focus on strong-to-weak alignment and self-alignment settings, and it is impractical to adapt them to the much harder weak-to-strong alignment setting. To fill this gap, we propose a multi-agent contrastive preference optimization (MACPO) framework. MACPO facilitates weak teachers and strong students to learn from each other by iteratively reinforcing unfamiliar positive behaviors while penalizing familiar negative ones. To get this, we devise a mutual positive behavior augmentation strategy to encourage weak teachers and strong students to learn from each other's positive behavior and further provide higher quality positive behavior for the next iteration. Additionally, we propose a hard negative behavior construction strategy to induce weak teachers and strong students to generate familiar negative behavior by fine-tuning on negative behavioral data. Experimental results on the HH-RLHF and PKU-SafeRLHF datasets, evaluated using both automatic metrics and human judgments, demonstrate that MACPO simultaneously improves the alignment performance of strong students and weak teachers. Moreover, as the number of weak teachers increases, MACPO achieves better weak-to-strong alignment performance through more iteration optimization rounds. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07672v2-abstract-full').style.display = 'none'; document.getElementById('2410.07672v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06886">arXiv:2410.06886</a> <span> [<a href="https://arxiv.org/pdf/2410.06886">pdf</a>, <a href="https://arxiv.org/format/2410.06886">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> FltLM: An Intergrated Long-Context Large Language Model for Effective Context Filtering and Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jingyang Deng</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhengyang Shen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Boyang Wang</a>, <a href="/search/cs?searchtype=author&query=Su%2C+L">Lixin Su</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">Suqi Cheng</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+Y">Ying Nie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junfeng Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jinwen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06886v1-abstract-short" style="display: inline;"> The development of Long-Context Large Language Models (LLMs) has markedly advanced natural language processing by facilitating the process of textual data across long documents and multiple corpora. However, Long-Context LLMs still face two critical challenges: The lost in the middle phenomenon, where crucial middle-context information is likely to be missed, and the distraction issue that the mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06886v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06886v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06886v1-abstract-full" style="display: none;"> The development of Long-Context Large Language Models (LLMs) has markedly advanced natural language processing by facilitating the process of textual data across long documents and multiple corpora. However, Long-Context LLMs still face two critical challenges: The lost in the middle phenomenon, where crucial middle-context information is likely to be missed, and the distraction issue that the models lose focus due to overly extended contexts. To address these challenges, we propose the Context Filtering Language Model (FltLM), a novel integrated Long-Context LLM which enhances the ability of the model on multi-document question-answering (QA) tasks. Specifically, FltLM innovatively incorporates a context filter with a soft mask mechanism, identifying and dynamically excluding irrelevant content to concentrate on pertinent information for better comprehension and reasoning. Our approach not only mitigates these two challenges, but also enables the model to operate conveniently in a single forward pass. Experimental results demonstrate that FltLM significantly outperforms supervised fine-tuning and retrieval-based methods in complex QA scenarios, suggesting a promising solution for more accurate and reliable long-context natural language understanding applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06886v1-abstract-full').style.display = 'none'; document.getElementById('2410.06886v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the 27th European Conference on Artificial Intelligence (ECAI-2024), this is the full version of the paper including technical appendices. This final version features enhanced formatting and corrections to errors present in other online versions. We regret any inconvenience this may have caused our readers</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05763">arXiv:2410.05763</a> <span> [<a href="https://arxiv.org/pdf/2410.05763">pdf</a>, <a href="https://arxiv.org/format/2410.05763">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Information Discovery in e-Commerce </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhaochun Ren</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiangnan He</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=de+Rijke%2C+M">Maarten de Rijke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05763v3-abstract-short" style="display: inline;"> Electronic commerce, or e-commerce, is the buying and selling of goods and services, or the transmitting of funds or data online. E-commerce platforms come in many kinds, with global players such as Amazon, Airbnb, Alibaba, eBay and platforms targeting specific geographic regions. Information retrieval has a natural role to play in e-commerce, especially in connecting people to goods and services.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05763v3-abstract-full').style.display = 'inline'; document.getElementById('2410.05763v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05763v3-abstract-full" style="display: none;"> Electronic commerce, or e-commerce, is the buying and selling of goods and services, or the transmitting of funds or data online. E-commerce platforms come in many kinds, with global players such as Amazon, Airbnb, Alibaba, eBay and platforms targeting specific geographic regions. Information retrieval has a natural role to play in e-commerce, especially in connecting people to goods and services. Information discovery in e-commerce concerns different types of search (e.g., exploratory search vs. lookup tasks), recommender systems, and natural language processing in e-commerce portals. The rise in popularity of e-commerce sites has made research on information discovery in e-commerce an increasingly active research area. This is witnessed by an increase in publications and dedicated workshops in this space. Methods for information discovery in e-commerce largely focus on improving the effectiveness of e-commerce search and recommender systems, on enriching and using knowledge graphs to support e-commerce, and on developing innovative question answering and bot-based solutions that help to connect people to goods and services. In this survey, an overview is given of the fundamental infrastructure, algorithms, and technical solutions for information discovery in e-commerce. The topics covered include user behavior and profiling, search, recommendation, and language technology in e-commerce. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05763v3-abstract-full').style.display = 'none'; document.getElementById('2410.05763v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01920">arXiv:2410.01920</a> <span> [<a href="https://arxiv.org/pdf/2410.01920">pdf</a>, <a href="https://arxiv.org/format/2410.01920">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Step-by-Step Reasoning for Math Problems via Twisted Sequential Monte Carlo </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+S">Shengyu Feng</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+X">Xiang Kong</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shuang Ma</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+A">Aonan Zhang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dong Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chong Wang</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+R">Ruoming Pang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yiming Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01920v4-abstract-short" style="display: inline;"> Augmenting the multi-step reasoning abilities of Large Language Models (LLMs) has been a persistent challenge. Recently, verification has shown promise in improving solution consistency by evaluating generated outputs. However, current verification approaches suffer from sampling inefficiencies, requiring a large number of samples to achieve satisfactory performance. Additionally, training an effe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01920v4-abstract-full').style.display = 'inline'; document.getElementById('2410.01920v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01920v4-abstract-full" style="display: none;"> Augmenting the multi-step reasoning abilities of Large Language Models (LLMs) has been a persistent challenge. Recently, verification has shown promise in improving solution consistency by evaluating generated outputs. However, current verification approaches suffer from sampling inefficiencies, requiring a large number of samples to achieve satisfactory performance. Additionally, training an effective verifier often depends on extensive process supervision, which is costly to acquire. In this paper, we address these limitations by introducing a novel verification method based on Twisted Sequential Monte Carlo (TSMC). TSMC sequentially refines its sampling effort to focus exploration on promising candidates, resulting in more efficient generation of high-quality solutions. We apply TSMC to LLMs by estimating the expected future rewards at partial solutions. This approach results in a more straightforward training target that eliminates the need for step-wise human annotations. We empirically demonstrate the advantages of our method across multiple math benchmarks, and also validate our theoretical analysis of both our approach and existing verification methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01920v4-abstract-full').style.display = 'none'; document.getElementById('2410.01920v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16913">arXiv:2409.16913</a> <span> [<a href="https://arxiv.org/pdf/2409.16913">pdf</a>, <a href="https://arxiv.org/format/2409.16913">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Tell Me What You Don't Know: Enhancing Refusal Capabilities of Role-Playing Agents via Representation Space Analysis and Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenhao Liu</a>, <a href="/search/cs?searchtype=author&query=An%2C+S">Siyu An</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Junru Lu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Muling Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tianlong Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaohua Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xiaoqing Zheng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Di Yin</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xing Sun</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16913v1-abstract-short" style="display: inline;"> Role-Playing Agents (RPAs) have shown remarkable performance in various applications, yet they often struggle to recognize and appropriately respond to hard queries that conflict with their role-play knowledge. To investigate RPAs' performance when faced with different types of conflicting requests, we develop an evaluation benchmark that includes contextual knowledge conflicting requests, paramet… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16913v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16913v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16913v1-abstract-full" style="display: none;"> Role-Playing Agents (RPAs) have shown remarkable performance in various applications, yet they often struggle to recognize and appropriately respond to hard queries that conflict with their role-play knowledge. To investigate RPAs' performance when faced with different types of conflicting requests, we develop an evaluation benchmark that includes contextual knowledge conflicting requests, parametric knowledge conflicting requests, and non-conflicting requests to assess RPAs' ability to identify conflicts and refuse to answer appropriately without over-refusing. Through extensive evaluation, we find that most RPAs behave significant performance gaps toward different conflict requests. To elucidate the reasons, we conduct an in-depth representation-level analysis of RPAs under various conflict scenarios. Our findings reveal the existence of rejection regions and direct response regions within the model's forwarding representation, and thus influence the RPA's final response behavior. Therefore, we introduce a lightweight representation editing approach that conveniently shifts conflicting requests to the rejection region, thereby enhancing the model's refusal accuracy. The experimental results validate the effectiveness of our editing method, improving RPAs' refusal ability of conflicting requests while maintaining their general role-playing capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16913v1-abstract-full').style.display = 'none'; document.getElementById('2409.16913v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16594">arXiv:2409.16594</a> <span> [<a href="https://arxiv.org/pdf/2409.16594">pdf</a>, <a href="https://arxiv.org/format/2409.16594">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generative Pre-trained Ranking Model with Over-parameterization at Web-Scale (Extended Abstract) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuchen Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Haoyi Xiong</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+L">Linghe Kong</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+J">Jiang Bian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Guihai Chen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16594v1-abstract-short" style="display: inline;"> Learning to rank (LTR) is widely employed in web searches to prioritize pertinent webpages from retrieved content based on input queries. However, traditional LTR models encounter two principal obstacles that lead to suboptimal performance: (1) the lack of well-annotated query-webpage pairs with ranking scores covering a diverse range of search query popularities, which hampers their ability to ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16594v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16594v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16594v1-abstract-full" style="display: none;"> Learning to rank (LTR) is widely employed in web searches to prioritize pertinent webpages from retrieved content based on input queries. However, traditional LTR models encounter two principal obstacles that lead to suboptimal performance: (1) the lack of well-annotated query-webpage pairs with ranking scores covering a diverse range of search query popularities, which hampers their ability to address queries across the popularity spectrum, and (2) inadequately trained models that fail to induce generalized representations for LTR, resulting in overfitting. To address these challenges, we propose a \emph{\uline{G}enerative \uline{S}emi-\uline{S}upervised \uline{P}re-trained} (GS2P) LTR model. We conduct extensive offline experiments on both a publicly available dataset and a real-world dataset collected from a large-scale search engine. Furthermore, we deploy GS2P in a large-scale web search engine with realistic traffic, where we observe significant improvements in the real-world application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16594v1-abstract-full').style.display = 'none'; document.getElementById('2409.16594v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16590">arXiv:2409.16590</a> <span> [<a href="https://arxiv.org/pdf/2409.16590">pdf</a>, <a href="https://arxiv.org/format/2409.16590">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Pre-trained Graphformer-based Ranking at Web-scale Search (Extended Abstract) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuchen Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Haoyi Xiong</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+L">Linghe Kong</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zeyi Sun</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyang Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16590v1-abstract-short" style="display: inline;"> Both Transformer and Graph Neural Networks (GNNs) have been employed in the domain of learning to rank (LTR). However, these approaches adhere to two distinct yet complementary problem formulations: ranking score regression based on query-webpage pairs, and link prediction within query-webpage bipartite graphs, respectively. While it is possible to pre-train GNNs or Transformers on source datasets… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16590v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16590v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16590v1-abstract-full" style="display: none;"> Both Transformer and Graph Neural Networks (GNNs) have been employed in the domain of learning to rank (LTR). However, these approaches adhere to two distinct yet complementary problem formulations: ranking score regression based on query-webpage pairs, and link prediction within query-webpage bipartite graphs, respectively. While it is possible to pre-train GNNs or Transformers on source datasets and subsequently fine-tune them on sparsely annotated LTR datasets, the distributional shifts between the pair-based and bipartite graph domains present significant challenges in integrating these heterogeneous models into a unified LTR framework at web scale. To address this, we introduce the novel MPGraf model, which leverages a modular and capsule-based pre-training strategy, aiming to cohesively integrate the regression capabilities of Transformers with the link prediction strengths of GNNs. We conduct extensive offline and online experiments to rigorously evaluate the performance of MPGraf. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16590v1-abstract-full').style.display = 'none'; document.getElementById('2409.16590v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16202">arXiv:2409.16202</a> <span> [<a href="https://arxiv.org/pdf/2409.16202">pdf</a>, <a href="https://arxiv.org/format/2409.16202">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CJEval: A Benchmark for Assessing Large Language Models Using Chinese Junior High School Exam Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qian-Wen Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haochen Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fang Li</a>, <a href="/search/cs?searchtype=author&query=An%2C+S">Siyu An</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+L">Lingfeng Qiao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Liangcai Gao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Di Yin</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xing Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16202v2-abstract-short" style="display: inline;"> Online education platforms have significantly transformed the dissemination of educational resources by providing a dynamic and digital infrastructure. With the further enhancement of this transformation, the advent of Large Language Models (LLMs) has elevated the intelligence levels of these platforms. However, current academic benchmarks provide limited guidance for real-world industry scenarios… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16202v2-abstract-full').style.display = 'inline'; document.getElementById('2409.16202v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16202v2-abstract-full" style="display: none;"> Online education platforms have significantly transformed the dissemination of educational resources by providing a dynamic and digital infrastructure. With the further enhancement of this transformation, the advent of Large Language Models (LLMs) has elevated the intelligence levels of these platforms. However, current academic benchmarks provide limited guidance for real-world industry scenarios. This limitation arises because educational applications require more than mere test question responses. To bridge this gap, we introduce CJEval, a benchmark based on Chinese Junior High School Exam Evaluations. CJEval consists of 26,136 samples across four application-level educational tasks covering ten subjects. These samples include not only questions and answers but also detailed annotations such as question types, difficulty levels, knowledge concepts, and answer explanations. By utilizing this benchmark, we assessed LLMs' potential applications and conducted a comprehensive analysis of their performance by fine-tuning on various educational tasks. Extensive experiments and discussions have highlighted the opportunities and challenges of applying LLMs in the field of education. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16202v2-abstract-full').style.display = 'none'; document.getElementById('2409.16202v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11901">arXiv:2409.11901</a> <span> [<a href="https://arxiv.org/pdf/2409.11901">pdf</a>, <a href="https://arxiv.org/format/2409.11901">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLMs + Persona-Plug = Personalized LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiongnan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yutao Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuting Wang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xiaochi Wei</a>, <a href="/search/cs?searchtype=author&query=Min%2C+E">Erxue Min</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yu Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Z">Zhicheng Dou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11901v1-abstract-short" style="display: inline;"> Personalization plays a critical role in numerous language tasks and applications, since users with the same requirements may prefer diverse outputs based on their individual interests. This has led to the development of various personalized approaches aimed at adapting large language models (LLMs) to generate customized outputs aligned with user preferences. Some of them involve fine-tuning a uni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11901v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11901v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11901v1-abstract-full" style="display: none;"> Personalization plays a critical role in numerous language tasks and applications, since users with the same requirements may prefer diverse outputs based on their individual interests. This has led to the development of various personalized approaches aimed at adapting large language models (LLMs) to generate customized outputs aligned with user preferences. Some of them involve fine-tuning a unique personalized LLM for each user, which is too expensive for widespread application. Alternative approaches introduce personalization information in a plug-and-play manner by retrieving the user's relevant historical texts as demonstrations. However, this retrieval-based strategy may break the continuity of the user history and fail to capture the user's overall styles and patterns, hence leading to sub-optimal performance. To address these challenges, we propose a novel personalized LLM model, \ours{}. It constructs a user-specific embedding for each individual by modeling all her historical contexts through a lightweight plug-in user embedder module. By attaching this embedding to the task input, LLMs can better understand and capture user habits and preferences, thereby producing more personalized outputs without tuning their own parameters. Extensive experiments on various tasks in the language model personalization (LaMP) benchmark demonstrate that the proposed model significantly outperforms existing personalized LLM approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11901v1-abstract-full').style.display = 'none'; document.getElementById('2409.11901v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10909">arXiv:2409.10909</a> <span> [<a href="https://arxiv.org/pdf/2409.10909">pdf</a>, <a href="https://arxiv.org/format/2409.10909">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> GenCRF: Generative Clustering and Reformulation Framework for Enhanced Intent-Driven Information Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Seo%2C+W">Wonduk Seo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haojie Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yueyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Changhao Zhang</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+S">Songyao Duan</a>, <a href="/search/cs?searchtype=author&query=Su%2C+L">Lixin Su</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+D">Daiting Shi</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jiashu Zhao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10909v1-abstract-short" style="display: inline;"> Query reformulation is a well-known problem in Information Retrieval (IR) aimed at enhancing single search successful completion rate by automatically modifying user's input query. Recent methods leverage Large Language Models (LLMs) to improve query reformulation, but often generate limited and redundant expansions, potentially constraining their effectiveness in capturing diverse intents. In thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10909v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10909v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10909v1-abstract-full" style="display: none;"> Query reformulation is a well-known problem in Information Retrieval (IR) aimed at enhancing single search successful completion rate by automatically modifying user's input query. Recent methods leverage Large Language Models (LLMs) to improve query reformulation, but often generate limited and redundant expansions, potentially constraining their effectiveness in capturing diverse intents. In this paper, we propose GenCRF: a Generative Clustering and Reformulation Framework to capture diverse intentions adaptively based on multiple differentiated, well-generated queries in the retrieval phase for the first time. GenCRF leverages LLMs to generate variable queries from the initial query using customized prompts, then clusters them into groups to distinctly represent diverse intents. Furthermore, the framework explores to combine diverse intents query with innovative weighted aggregation strategies to optimize retrieval performance and crucially integrates a novel Query Evaluation Rewarding Model (QERM) to refine the process through feedback loops. Empirical experiments on the BEIR benchmark demonstrate that GenCRF achieves state-of-the-art performance, surpassing previous query reformulation SOTAs by up to 12% on nDCG@10. These techniques can be adapted to various LLMs, significantly boosting retriever performance and advancing the field of Information Retrieval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10909v1-abstract-full').style.display = 'none'; document.getElementById('2409.10909v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05207">arXiv:2409.05207</a> <span> [<a href="https://arxiv.org/pdf/2409.05207">pdf</a>, <a href="https://arxiv.org/format/2409.05207">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Low Latency Transformer Inference on FPGAs for Physics Applications with hls4ml </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhixing Jiang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dennis Yin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yihui Chen</a>, <a href="/search/cs?searchtype=author&query=Khoda%2C+E+E">Elham E Khoda</a>, <a href="/search/cs?searchtype=author&query=Hauck%2C+S">Scott Hauck</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+S">Shih-Chieh Hsu</a>, <a href="/search/cs?searchtype=author&query=Govorkova%2C+E">Ekaterina Govorkova</a>, <a href="/search/cs?searchtype=author&query=Harris%2C+P">Philip Harris</a>, <a href="/search/cs?searchtype=author&query=Loncar%2C+V">Vladimir Loncar</a>, <a href="/search/cs?searchtype=author&query=Moreno%2C+E+A">Eric A. Moreno</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05207v1-abstract-short" style="display: inline;"> This study presents an efficient implementation of transformer architectures in Field-Programmable Gate Arrays(FPGAs) using hls4ml. We demonstrate the strategy for implementing the multi-head attention, softmax, and normalization layer and evaluate three distinct models. Their deployment on VU13P FPGA chip achieved latency less than 2us, demonstrating the potential for real-time applications. HLS4… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05207v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05207v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05207v1-abstract-full" style="display: none;"> This study presents an efficient implementation of transformer architectures in Field-Programmable Gate Arrays(FPGAs) using hls4ml. We demonstrate the strategy for implementing the multi-head attention, softmax, and normalization layer and evaluate three distinct models. Their deployment on VU13P FPGA chip achieved latency less than 2us, demonstrating the potential for real-time applications. HLS4ML compatibility with any TensorFlow-built transformer model further enhances the scalability and applicability of this work. Index Terms: FPGAs, machine learning, transformers, high energy physics, LIGO <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05207v1-abstract-full').style.display = 'none'; document.getElementById('2409.05207v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16500">arXiv:2408.16500</a> <span> [<a href="https://arxiv.org/pdf/2408.16500">pdf</a>, <a href="https://arxiv.org/format/2408.16500">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CogVLM2: Visual Language Models for Image and Video Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hong%2C+W">Wenyi Hong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weihan Wang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+M">Ming Ding</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wenmeng Yu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+Q">Qingsong Lv</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yean Cheng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Shiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+J">Junhui Ji</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Z">Zhao Xue</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Lei Zhao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhuoyi Yang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+X">Xiaotao Gu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaohan Zhang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+G">Guanyu Feng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Da Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zihan Wang</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+J">Ji Qi</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xixuan Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Peng Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Debing Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+B">Bin Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Juanzi Li</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yuxiao Dong</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jie Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16500v1-abstract-short" style="display: inline;"> Beginning with VisualGLM and CogVLM, we are continuously exploring VLMs in pursuit of enhanced vision-language fusion, efficient higher-resolution architecture, and broader modalities and applications. Here we propose the CogVLM2 family, a new generation of visual language models for image and video understanding including CogVLM2, CogVLM2-Video and GLM-4V. As an image understanding model, CogVLM2… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16500v1-abstract-full').style.display = 'inline'; document.getElementById('2408.16500v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16500v1-abstract-full" style="display: none;"> Beginning with VisualGLM and CogVLM, we are continuously exploring VLMs in pursuit of enhanced vision-language fusion, efficient higher-resolution architecture, and broader modalities and applications. Here we propose the CogVLM2 family, a new generation of visual language models for image and video understanding including CogVLM2, CogVLM2-Video and GLM-4V. As an image understanding model, CogVLM2 inherits the visual expert architecture with improved training recipes in both pre-training and post-training stages, supporting input resolution up to $1344 \times 1344$ pixels. As a video understanding model, CogVLM2-Video integrates multi-frame input with timestamps and proposes automated temporal grounding data construction. Notably, CogVLM2 family has achieved state-of-the-art results on benchmarks like MMBench, MM-Vet, TextVQA, MVBench and VCGBench. All models are open-sourced in https://github.com/THUDM/CogVLM2 and https://github.com/THUDM/GLM-4, contributing to the advancement of the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16500v1-abstract-full').style.display = 'none'; document.getElementById('2408.16500v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10269">arXiv:2408.10269</a> <span> [<a href="https://arxiv.org/pdf/2408.10269">pdf</a>, <a href="https://arxiv.org/format/2408.10269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> OpenCity: Open Spatio-Temporal Foundation Models for Traffic Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhonghang Li</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Long Xia</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L">Lei Shi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yong Xu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10269v1-abstract-short" style="display: inline;"> Accurate traffic forecasting is crucial for effective urban planning and transportation management, enabling efficient resource allocation and enhanced travel experiences. However, existing models often face limitations in generalization, struggling with zero-shot prediction on unseen regions and cities, as well as diminished long-term accuracy. This is primarily due to the inherent challenges in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10269v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10269v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10269v1-abstract-full" style="display: none;"> Accurate traffic forecasting is crucial for effective urban planning and transportation management, enabling efficient resource allocation and enhanced travel experiences. However, existing models often face limitations in generalization, struggling with zero-shot prediction on unseen regions and cities, as well as diminished long-term accuracy. This is primarily due to the inherent challenges in handling the spatial and temporal heterogeneity of traffic data, coupled with the significant distribution shift across time and space. In this work, we aim to unlock new possibilities for building versatile, resilient and adaptive spatio-temporal foundation models for traffic prediction. To achieve this goal, we introduce a novel foundation model, named OpenCity, that can effectively capture and normalize the underlying spatio-temporal patterns from diverse data characteristics, facilitating zero-shot generalization across diverse urban environments. OpenCity integrates the Transformer architecture with graph neural networks to model the complex spatio-temporal dependencies in traffic data. By pre-training OpenCity on large-scale, heterogeneous traffic datasets, we enable the model to learn rich, generalizable representations that can be seamlessly applied to a wide range of traffic forecasting scenarios. Experimental results demonstrate that OpenCity exhibits exceptional zero-shot predictive performance. Moreover, OpenCity showcases promising scaling laws, suggesting the potential for developing a truly one-for-all traffic prediction solution that can adapt to new urban contexts with minimal overhead. We made our proposed OpenCity model open-source and it is available at the following link: https://github.com/HKUDS/OpenCity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10269v1-abstract-full').style.display = 'none'; document.getElementById('2408.10269v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08345">arXiv:2408.08345</a> <span> [<a href="https://arxiv.org/pdf/2408.08345">pdf</a>, <a href="https://arxiv.org/format/2408.08345">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> 5%>100%: Breaking Performance Shackles of Full Fine-Tuning on Visual Recognition Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dongshuo Yin</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+L">Leiyi Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bin Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Youqun Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xue Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08345v2-abstract-short" style="display: inline;"> Pre-training & fine-tuning can enhance the transferring efficiency and performance in visual tasks. Recent delta-tuning methods provide more options for visual classification tasks. Despite their success, existing visual delta-tuning art fails to exceed the upper limit of full fine-tuning on challenging tasks like object detection and segmentation. To find a competitive alternative to full fine-tu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08345v2-abstract-full').style.display = 'inline'; document.getElementById('2408.08345v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08345v2-abstract-full" style="display: none;"> Pre-training & fine-tuning can enhance the transferring efficiency and performance in visual tasks. Recent delta-tuning methods provide more options for visual classification tasks. Despite their success, existing visual delta-tuning art fails to exceed the upper limit of full fine-tuning on challenging tasks like object detection and segmentation. To find a competitive alternative to full fine-tuning, we propose the Multi-cognitive Visual Adapter (Mona) tuning, a novel adapter-based tuning method. First, we introduce multiple vision-friendly filters into the adapter to enhance its ability to process visual signals, while previous methods mainly rely on language-friendly linear filters. Second, we add the scaled normalization layer in the adapter to regulate the distribution of input features for visual filters. To fully demonstrate the practicality and generality of Mona, we conduct experiments on multiple representative visual tasks, including instance segmentation on COCO, semantic segmentation on ADE20K, object detection on Pascal VOC, oriented object detection on DOTA/STAR, and image classification on three common datasets. Exciting results illustrate that Mona surpasses full fine-tuning on all these tasks, and is the only delta-tuning method outperforming full fine-tuning on the above various tasks. For example, Mona achieves 1% performance gain on the COCO dataset compared to full fine-tuning. Comprehensive results suggest that Mona-tuning is more suitable for retaining and utilizing the capabilities of pre-trained models than full fine-tuning. The code will be released at https://github.com/Leiyi-Hu/mona. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08345v2-abstract-full').style.display = 'none'; document.getElementById('2408.08345v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2311.15010</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yin%2C+D&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+D&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository