CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 928 results for author: <span class="mathjax">Sun, M</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Sun%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Sun, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Sun%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Sun, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Sun%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18407">arXiv:2502.18407</a> <span> [<a href="https://arxiv.org/pdf/2502.18407">pdf</a>, <a href="https://arxiv.org/format/2502.18407">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AgentRM: Enhancing Agent Generalization with Reward Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yu Xia</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jingru Fan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weize Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+S">Siyu Yan</a>, <a href="/search/cs?searchtype=author&query=Cong%2C+X">Xin Cong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yaxi Lu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yankai Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18407v1-abstract-short" style="display: inline;"> Existing LLM-based agents have achieved strong performance on held-in tasks, but their generalizability to unseen tasks remains poor. Hence, some recent work focus on fine-tuning the policy model with more diverse tasks to improve the generalizability. In this work, we find that finetuning a reward model to guide the policy model is more robust than directly finetuning the policy model. Based on t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18407v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18407v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18407v1-abstract-full" style="display: none;"> Existing LLM-based agents have achieved strong performance on held-in tasks, but their generalizability to unseen tasks remains poor. Hence, some recent work focus on fine-tuning the policy model with more diverse tasks to improve the generalizability. In this work, we find that finetuning a reward model to guide the policy model is more robust than directly finetuning the policy model. Based on this finding, we propose AgentRM, a generalizable reward model, to guide the policy model for effective test-time search. We comprehensively investigate three approaches to construct the reward model, including explicit reward modeling, implicit reward modeling and LLM-as-a-judge. We then use AgentRM to guide the answer generation with Best-of-N sampling and step-level beam search. On four types of nine agent tasks, AgentRM enhances the base policy model by $8.8$ points on average, surpassing the top general agent by $4.0$. Moreover, it demonstrates weak-to-strong generalization, yielding greater improvement of $12.6$ on LLaMA-3-70B policy model. As for the specializability, AgentRM can also boost a finetuned policy model and outperform the top specialized agent by $11.4$ on three held-in tasks. Further analysis verifies its effectiveness in test-time scaling. Codes will be released to facilitate the research in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18407v1-abstract-full').style.display = 'none'; document.getElementById('2502.18407v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18008">arXiv:2502.18008</a> <span> [<a href="https://arxiv.org/pdf/2502.18008">pdf</a>, <a href="https://arxiv.org/format/2502.18008">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> NotaGen: Advancing Musicality in Symbolic Music Generation with Large Language Model Training Paradigms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yashan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Shangda Wu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jianhuai Hu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xingjian Du</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Y">Yueqi Peng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongxin Huang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+S">Shuai Fan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaobing Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+F">Feng Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18008v1-abstract-short" style="display: inline;"> We introduce NotaGen, a symbolic music generation model aiming to explore the potential of producing high-quality classical sheet music. Inspired by the success of Large Language Models (LLMs), NotaGen adopts pre-training, fine-tuning, and reinforcement learning paradigms (henceforth referred to as the LLM training paradigms). It is pre-trained on 1.6M pieces of music, and then fine-tuned on appro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18008v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18008v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18008v1-abstract-full" style="display: none;"> We introduce NotaGen, a symbolic music generation model aiming to explore the potential of producing high-quality classical sheet music. Inspired by the success of Large Language Models (LLMs), NotaGen adopts pre-training, fine-tuning, and reinforcement learning paradigms (henceforth referred to as the LLM training paradigms). It is pre-trained on 1.6M pieces of music, and then fine-tuned on approximately 9K high-quality classical compositions conditioned on "period-composer-instrumentation" prompts. For reinforcement learning, we propose the CLaMP-DPO method, which further enhances generation quality and controllability without requiring human annotations or predefined rewards. Our experiments demonstrate the efficacy of CLaMP-DPO in symbolic music generation models with different architectures and encoding schemes. Furthermore, subjective A/B tests show that NotaGen outperforms baseline models against human compositions, greatly advancing musical aesthetics in symbolic music generation.The project homepage is https://electricalexis.github.io/notagen-demo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18008v1-abstract-full').style.display = 'none'; document.getElementById('2502.18008v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17941">arXiv:2502.17941</a> <span> [<a href="https://arxiv.org/pdf/2502.17941">pdf</a>, <a href="https://arxiv.org/format/2502.17941">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Optimal Brain Apoptosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingyuan Sun</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Z">Zheng Fang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiaxu Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Junjie Jiang</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+D">Delei Kong</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chenming Hu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yuetong Fang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Renjing Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17941v1-abstract-short" style="display: inline;"> The increasing complexity and parameter count of Convolutional Neural Networks (CNNs) and Transformers pose challenges in terms of computational efficiency and resource demands. Pruning has been identified as an effective strategy to address these challenges by removing redundant elements such as neurons, channels, or connections, thereby enhancing computational efficiency without heavily compromi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17941v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17941v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17941v1-abstract-full" style="display: none;"> The increasing complexity and parameter count of Convolutional Neural Networks (CNNs) and Transformers pose challenges in terms of computational efficiency and resource demands. Pruning has been identified as an effective strategy to address these challenges by removing redundant elements such as neurons, channels, or connections, thereby enhancing computational efficiency without heavily compromising performance. This paper builds on the foundational work of Optimal Brain Damage (OBD) by advancing the methodology of parameter importance estimation using the Hessian matrix. Unlike previous approaches that rely on approximations, we introduce Optimal Brain Apoptosis (OBA), a novel pruning method that calculates the Hessian-vector product value directly for each parameter. By decomposing the Hessian matrix across network layers and identifying conditions under which inter-layer Hessian submatrices are non-zero, we propose a highly efficient technique for computing the second-order Taylor expansion of parameters. This approach allows for a more precise pruning process, particularly in the context of CNNs and Transformers, as validated in our experiments including VGG19, ResNet32, ResNet50, and ViT-B/16 on CIFAR10, CIFAR100 and Imagenet datasets. Our code is available at https://github.com/NEU-REAL/OBA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17941v1-abstract-full').style.display = 'none'; document.getElementById('2502.17941v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17759">arXiv:2502.17759</a> <span> [<a href="https://arxiv.org/pdf/2502.17759">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Label-free Prediction of Vascular Connectivity in Perfused Microvascular Networks in vitro </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+L">Liang Xu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+P">Pengwu Song</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Shilu Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ru Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zhiyuan Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qingdong Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jie Gao</a>, <a href="/search/cs?searchtype=author&query=Han%2C+C">Chen Han</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingzhai Sun</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+P">Peng Yao</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+M">Min Ye</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R+X">Ronald X. Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17759v1-abstract-short" style="display: inline;"> Continuous monitoring and in-situ assessment of microvascular connectivity have significant implications for culturing vascularized organoids and optimizing the therapeutic strategies. However, commonly used methods for vascular connectivity assessment heavily rely on fluorescent labels that may either raise biocompatibility concerns or interrupt the normal cell growth process. To address this iss… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17759v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17759v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17759v1-abstract-full" style="display: none;"> Continuous monitoring and in-situ assessment of microvascular connectivity have significant implications for culturing vascularized organoids and optimizing the therapeutic strategies. However, commonly used methods for vascular connectivity assessment heavily rely on fluorescent labels that may either raise biocompatibility concerns or interrupt the normal cell growth process. To address this issue, a Vessel Connectivity Network (VC-Net) was developed for label-free assessment of vascular connectivity. To validate the VC-Net, microvascular networks (MVNs) were cultured in vitro and their microscopic images were acquired at different culturing conditions as a training dataset. The VC-Net employs a Vessel Queue Contrastive Learning (VQCL) method and a class imbalance algorithm to address the issues of limited sample size, indistinctive class features and imbalanced class distribution in the dataset. The VC-Net successfully evaluated the vascular connectivity with no significant deviation from that by fluorescence imaging. In addition, the proposed VC-Net successfully differentiated the connectivity characteristics between normal and tumor-related MVNs. In comparison with those cultured in the regular microenvironment, the averaged connectivity of MVNs cultured in the tumor-related microenvironment decreased by 30.8%, whereas the non-connected area increased by 37.3%. This study provides a new avenue for label-free and continuous assessment of organoid or tumor vascularization in vitro. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17759v1-abstract-full').style.display = 'none'; document.getElementById('2502.17759v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17483">arXiv:2502.17483</a> <span> [<a href="https://arxiv.org/pdf/2502.17483">pdf</a>, <a href="https://arxiv.org/format/2502.17483">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ConSense: Continually Sensing Human Activity with WiFi via Growing and Picking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Rong Li</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+T">Tao Deng</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+S">Siwei Feng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingjie Sun</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+J">Juncheng Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17483v1-abstract-short" style="display: inline;"> WiFi-based human activity recognition (HAR) holds significant application potential across various fields. To handle dynamic environments where new activities are continuously introduced, WiFi-based HAR systems must adapt by learning new concepts without forgetting previously learned ones. Furthermore, retaining knowledge from old activities by storing historical exemplar is impractical for WiFi-b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17483v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17483v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17483v1-abstract-full" style="display: none;"> WiFi-based human activity recognition (HAR) holds significant application potential across various fields. To handle dynamic environments where new activities are continuously introduced, WiFi-based HAR systems must adapt by learning new concepts without forgetting previously learned ones. Furthermore, retaining knowledge from old activities by storing historical exemplar is impractical for WiFi-based HAR due to privacy concerns and limited storage capacity of edge devices. In this work, we propose ConSense, a lightweight and fast-adapted exemplar-free class incremental learning framework for WiFi-based HAR. The framework leverages the transformer architecture and involves dynamic model expansion and selective retraining to preserve previously learned knowledge while integrating new information. Specifically, during incremental sessions, small-scale trainable parameters that are trained specifically on the data of each task are added in the multi-head self-attention layer. In addition, a selective retraining strategy that dynamically adjusts the weights in multilayer perceptron based on the performance stability of neurons across tasks is used. Rather than training the entire model, the proposed strategies of dynamic model expansion and selective retraining reduce the overall computational load while balancing stability on previous tasks and plasticity on new tasks. Evaluation results on three public WiFi datasets demonstrate that ConSense not only outperforms several competitive approaches but also requires fewer parameters, highlighting its practical utility in class-incremental scenarios for HAR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17483v1-abstract-full').style.display = 'none'; document.getElementById('2502.17483v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17315">arXiv:2502.17315</a> <span> [<a href="https://arxiv.org/pdf/2502.17315">pdf</a>, <a href="https://arxiv.org/format/2502.17315">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> HIPPO: Enhancing the Table Understanding Capability of Large Language Models through Hybrid-Modal Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haolan Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinze Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Q">Qiushi Xiong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaocui Yang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yu Gu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Q">Qi Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fangfang Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17315v1-abstract-short" style="display: inline;"> Tabular data contains rich structural semantics and plays a crucial role in organizing and manipulating information. To better capture these structural semantics, this paper introduces the HybrId-modal Preference oPtimizatiOn (HIPPO) model, which represents tables using both text and image, and optimizes MLLMs to effectively learn more comprehensive table information from these multiple modalities… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17315v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17315v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17315v1-abstract-full" style="display: none;"> Tabular data contains rich structural semantics and plays a crucial role in organizing and manipulating information. To better capture these structural semantics, this paper introduces the HybrId-modal Preference oPtimizatiOn (HIPPO) model, which represents tables using both text and image, and optimizes MLLMs to effectively learn more comprehensive table information from these multiple modalities. Specifically, HIPPO samples model responses from hybrid-modal table representations and designs a modality-consistent sampling strategy to enhance response diversity and mitigate modality bias during DPO training. Experimental results on table question answering and table fact verification tasks demonstrate the effectiveness of HIPPO, achieving a 4% improvement over various table reasoning models. Further analysis reveals that HIPPO not only enhances reasoning abilities based on unimodal table representations but also facilitates the extraction of crucial and distinct semantics from different modal representations. All data and codes are available at https://github.com/NEUIR/HIPPO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17315v1-abstract-full').style.display = 'none'; document.getElementById('2502.17315v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17297">arXiv:2502.17297</a> <span> [<a href="https://arxiv.org/pdf/2502.17297">pdf</a>, <a href="https://arxiv.org/format/2502.17297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Retrieval-Augmented Generation in Multi-Modal Contexts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xingsheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+T">Tianshuo Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xiaoyuan Yi</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yu Gu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17297v1-abstract-short" style="display: inline;"> This paper introduces Multi-Modal Retrieval-Augmented Generation (M^2RAG), a benchmark designed to evaluate the effectiveness of Multi-modal Large Language Models (MLLMs) in leveraging knowledge from multi-modal retrieval documents. The benchmark comprises four tasks: image captioning, multi-modal question answering, multi-modal fact verification, and image reranking. All tasks are set in an open-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17297v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17297v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17297v1-abstract-full" style="display: none;"> This paper introduces Multi-Modal Retrieval-Augmented Generation (M^2RAG), a benchmark designed to evaluate the effectiveness of Multi-modal Large Language Models (MLLMs) in leveraging knowledge from multi-modal retrieval documents. The benchmark comprises four tasks: image captioning, multi-modal question answering, multi-modal fact verification, and image reranking. All tasks are set in an open-domain setting, requiring RAG models to retrieve query-relevant information from a multi-modal document collection and use it as input context for RAG modeling. To enhance the context utilization capabilities of MLLMs, we also introduce Multi-Modal Retrieval-Augmented Instruction Tuning (MM-RAIT), an instruction tuning method that optimizes MLLMs within multi-modal contexts. Our experiments show that MM-RAIT improves the performance of RAG systems by enabling them to effectively learn from multi-modal contexts. All data and code are available at https://github.com/NEUIR/M2RAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17297v1-abstract-full').style.display = 'none'; document.getElementById('2502.17297v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15828">arXiv:2502.15828</a> <span> [<a href="https://arxiv.org/pdf/2502.15828">pdf</a>, <a href="https://arxiv.org/format/2502.15828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Stronger Mixture of Low-Rank Experts for Fine-Tuning Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengyang Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yihao Wang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tao Feng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jie Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15828v1-abstract-short" style="display: inline;"> In order to streamline the fine-tuning of foundation models, Low-Rank Adapters (LoRAs) have been substantially adopted across various fields, including instruction tuning and domain adaptation. The underlying concept of LoRA involves decomposing a full-rank matrix into the product of two lower-rank matrices, which reduces storage consumption and accelerates the training process. Furthermore, to ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15828v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15828v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15828v1-abstract-full" style="display: none;"> In order to streamline the fine-tuning of foundation models, Low-Rank Adapters (LoRAs) have been substantially adopted across various fields, including instruction tuning and domain adaptation. The underlying concept of LoRA involves decomposing a full-rank matrix into the product of two lower-rank matrices, which reduces storage consumption and accelerates the training process. Furthermore, to address the limited expressive capacity of LoRA, the Mixture-of-Expert (MoE) has been introduced for incorporating multiple LoRA adapters. The integration of LoRA experts leads to a visible improvement across several downstream scenes. However, the mixture of LoRAs (MoE-LoRA) still exhibits its low robustness during tuning and inferring. Inspired by the Riemannian Preconditioners which train LoRA as a sub-space projector, we propose a new training strategy for MoE-LoRA, to stabilize and boost its feature learning procedure by multi-space projections. Examinations on SGD and AdamW optimizers demonstrate the effectiveness of our methodology. Source code is available at https://github.com/THUDM/MoELoRA_Riemannian. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15828v1-abstract-full').style.display = 'none'; document.getElementById('2502.15828v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15589">arXiv:2502.15589</a> <span> [<a href="https://arxiv.org/pdf/2502.15589">pdf</a>, <a href="https://arxiv.org/format/2502.15589">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> LightThinker: Thinking Step-by-Step Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jintian Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuqi Zhu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengshu Sun</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yujie Luo</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+S">Shuofei Qiao</a>, <a href="/search/cs?searchtype=author&query=Du%2C+L">Lun Du</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+D">Da Zheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+N">Ningyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15589v1-abstract-short" style="display: inline;"> Large language models (LLMs) have shown remarkable performance in complex reasoning tasks, but their efficiency is hindered by the substantial memory and computational costs associated with generating lengthy tokens. In this paper, we propose LightThinker, a novel method that enables LLMs to dynamically compress intermediate thoughts during reasoning. Inspired by human cognitive processes, LightTh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15589v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15589v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15589v1-abstract-full" style="display: none;"> Large language models (LLMs) have shown remarkable performance in complex reasoning tasks, but their efficiency is hindered by the substantial memory and computational costs associated with generating lengthy tokens. In this paper, we propose LightThinker, a novel method that enables LLMs to dynamically compress intermediate thoughts during reasoning. Inspired by human cognitive processes, LightThinker compresses verbose thought steps into compact representations and discards the original reasoning chains, thereby significantly reducing the number of tokens stored in the context window. This is achieved by training the model on when and how to perform compression through data construction, mapping hidden states to condensed gist tokens, and creating specialized attention masks. Additionally, we introduce the Dependency (Dep) metric to quantify the degree of compression by measuring the reliance on historical tokens during generation. Extensive experiments on four datasets and two models show that LightThinker reduces peak memory usage and inference time, while maintaining competitive accuracy. Our work provides a new direction for improving the efficiency of LLMs in complex reasoning tasks without sacrificing performance. Code will be released at https://github.com/zjunlp/LightThinker. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15589v1-abstract-full').style.display = 'none'; document.getElementById('2502.15589v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15543">arXiv:2502.15543</a> <span> [<a href="https://arxiv.org/pdf/2502.15543">pdf</a>, <a href="https://arxiv.org/format/2502.15543">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PIP-KAG: Mitigating Knowledge Conflicts in Knowledge-Augmented Generation via Parametric Pruning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+P">Pengcheng Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xiaoyuan Yi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+T">Tong Xiao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+C">Chenyan Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15543v1-abstract-short" style="display: inline;"> Knowledge-Augmented Generation (KAG) has shown great promise in updating the internal memory of Large Language Models (LLMs) by integrating external knowledge. However, KAG inevitably faces knowledge conflicts when the internal memory contradicts external information. Current approaches to mitigating these conflicts mainly focus on improving external knowledge utilization. However, these methods h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15543v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15543v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15543v1-abstract-full" style="display: none;"> Knowledge-Augmented Generation (KAG) has shown great promise in updating the internal memory of Large Language Models (LLMs) by integrating external knowledge. However, KAG inevitably faces knowledge conflicts when the internal memory contradicts external information. Current approaches to mitigating these conflicts mainly focus on improving external knowledge utilization. However, these methods have shown only limited effectiveness in mitigating the knowledge conflict problem, as internal knowledge continues to influence the generation process of LLMs. In this paper, we propose a ParametrIc Pruning-based Knowledge-Augmented Generation (PIP-KAG) approach, which prunes internal knowledge of LLMs and incorporates a plug-and-play adaptation module to help LLMs better leverage external sources. Additionally, we construct the CoConflictQA benchmark based on the hallucination of LLMs to better evaluate contextual faithfulness during answering questions. Experimental results on CoConflictQA demonstrate that PIP-KAG significantly reduces knowledge conflicts and improves context fidelity. Notably, PIP-KAG reduces LLM's parameters by 13%, enhancing parameter efficiency in LLMs within the KAG framework. All codes are available at https://github.com/OpenBMB/PIP-KAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15543v1-abstract-full').style.display = 'none'; document.getElementById('2502.15543v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 7 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14856">arXiv:2502.14856</a> <span> [<a href="https://arxiv.org/pdf/2502.14856">pdf</a>, <a href="https://arxiv.org/format/2502.14856">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FR-Spec: Accelerating Large-Vocabulary Language Models via Frequency-Ranked Speculative Sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Weilin Zhao</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+T">Tengyu Pan</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yudi Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+A">Ao Sun</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuxiang Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kaihuo Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Weilun Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuxuan Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianyong Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14856v1-abstract-short" style="display: inline;"> Speculative sampling has emerged as an important technique for accelerating the auto-regressive generation process of large language models (LLMs) by utilizing a draft-then-verify mechanism to produce multiple tokens per forward pass. While state-of-the-art speculative sampling methods use only a single layer and a language modeling (LM) head as the draft model to achieve impressive layer compress… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14856v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14856v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14856v1-abstract-full" style="display: none;"> Speculative sampling has emerged as an important technique for accelerating the auto-regressive generation process of large language models (LLMs) by utilizing a draft-then-verify mechanism to produce multiple tokens per forward pass. While state-of-the-art speculative sampling methods use only a single layer and a language modeling (LM) head as the draft model to achieve impressive layer compression, their efficiency gains are substantially reduced for large-vocabulary LLMs, such as Llama-3-8B with a vocabulary of 128k tokens. To address this, we present FR-Spec, a frequency-ranked speculative sampling framework that optimizes draft candidate selection through vocabulary space compression. By constraining the draft search to a frequency-prioritized token subset, our method reduces LM Head computation overhead by 75% while ensuring the equivalence of the final output distribution. Experiments across multiple datasets demonstrate an average of 1.12$\times$ speedup over the state-of-the-art speculative sampling method EAGLE-2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14856v1-abstract-full').style.display = 'none'; document.getElementById('2502.14856v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14752">arXiv:2502.14752</a> <span> [<a href="https://arxiv.org/pdf/2502.14752">pdf</a>, <a href="https://arxiv.org/format/2502.14752">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> TritonBench: Benchmarking Large Language Model Capabilities for Generating Triton Operators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianling Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shangzhan Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Zhenye Gao</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Q">Qi Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuxuan Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zefan Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiacheng Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haojie Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianrong Wang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14752v1-abstract-short" style="display: inline;"> Triton, a high-level Python-like language designed for building efficient GPU kernels, is widely adopted in deep learning frameworks due to its portability, flexibility, and accessibility. However, programming and parallel optimization still require considerable trial and error from Triton developers. Despite advances in large language models (LLMs) for conventional code generation, these models s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14752v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14752v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14752v1-abstract-full" style="display: none;"> Triton, a high-level Python-like language designed for building efficient GPU kernels, is widely adopted in deep learning frameworks due to its portability, flexibility, and accessibility. However, programming and parallel optimization still require considerable trial and error from Triton developers. Despite advances in large language models (LLMs) for conventional code generation, these models struggle to generate accurate, performance-optimized Triton code, as they lack awareness of its specifications and the complexities of GPU programming. More critically, there is an urgent need for systematic evaluations tailored to Triton. In this work, we introduce TritonBench, the first comprehensive benchmark for Triton operator generation. TritonBench features two evaluation channels: a curated set of 184 real-world operators from GitHub and a collection of operators aligned with PyTorch interfaces. Unlike conventional code benchmarks prioritizing functional correctness, TritonBench also profiles efficiency performance on widely deployed GPUs aligned with industry applications. Our study reveals that current state-of-the-art code LLMs struggle to generate efficient Triton operators, highlighting a significant gap in high-performance code generation. TritonBench will be available at https://github.com/thunlp/TritonBench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14752v1-abstract-full').style.display = 'none'; document.getElementById('2502.14752v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12974">arXiv:2502.12974</a> <span> [<a href="https://arxiv.org/pdf/2502.12974">pdf</a>, <a href="https://arxiv.org/format/2502.12974">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Learning More Effective Representations for Dense Retrieval through Deliberate Thinking Before Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+Y">Yifan Ji</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhipeng Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shi Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yishan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yu Gu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12974v1-abstract-short" style="display: inline;"> Recent dense retrievers usually thrive on the emergency capabilities of Large Language Models (LLMs), using them to encode queries and documents into an embedding space for retrieval. These LLM-based dense retrievers have shown promising performance across various retrieval scenarios. However, relying on a single embedding to represent documents proves less effective in capturing different perspec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12974v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12974v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12974v1-abstract-full" style="display: none;"> Recent dense retrievers usually thrive on the emergency capabilities of Large Language Models (LLMs), using them to encode queries and documents into an embedding space for retrieval. These LLM-based dense retrievers have shown promising performance across various retrieval scenarios. However, relying on a single embedding to represent documents proves less effective in capturing different perspectives of documents for matching. In this paper, we propose Deliberate Thinking based Dense Retriever (DEBATER), which enhances these LLM-based retrievers by enabling them to learn more effective document representations through a step-by-step thinking process. DEBATER introduces the Chain-of-Deliberation mechanism to iteratively optimize document representations using a continuous chain of thought. To consolidate information from various thinking steps, DEBATER also incorporates the Self Distillation mechanism, which identifies the most informative thinking steps and integrates them into a unified text embedding. Experimental results show that DEBATER significantly outperforms existing methods across several retrieval benchmarks, demonstrating superior accuracy and robustness. All codes are available at https://github.com/OpenBMB/DEBATER. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12974v1-abstract-full').style.display = 'none'; document.getElementById('2502.12974v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12631">arXiv:2502.12631</a> <span> [<a href="https://arxiv.org/pdf/2502.12631">pdf</a>, <a href="https://arxiv.org/format/2502.12631">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Score-Based Diffusion Policy Compatible with Reinforcement Learning via Optimal Transport </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingyang Sun</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+P">Pengxiang Ding</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weinan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Donglin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12631v2-abstract-short" style="display: inline;"> Diffusion policies have shown promise in learning complex behaviors from demonstrations, particularly for tasks requiring precise control and long-term planning. However, they face challenges in robustness when encountering distribution shifts. This paper explores improving diffusion-based imitation learning models through online interactions with the environment. We propose OTPR (Optimal Transpor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12631v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12631v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12631v2-abstract-full" style="display: none;"> Diffusion policies have shown promise in learning complex behaviors from demonstrations, particularly for tasks requiring precise control and long-term planning. However, they face challenges in robustness when encountering distribution shifts. This paper explores improving diffusion-based imitation learning models through online interactions with the environment. We propose OTPR (Optimal Transport-guided score-based diffusion Policy for Reinforcement learning fine-tuning), a novel method that integrates diffusion policies with RL using optimal transport theory. OTPR leverages the Q-function as a transport cost and views the policy as an optimal transport map, enabling efficient and stable fine-tuning. Moreover, we introduce masked optimal transport to guide state-action matching using expert keypoints and a compatibility-based resampling strategy to enhance training stability. Experiments on three simulation tasks demonstrate OTPR's superior performance and robustness compared to existing methods, especially in complex and sparse-reward environments. In sum, OTPR provides an effective framework for combining IL and RL, achieving versatile and reliable policy learning. The code will be released at https://github.com/Sunmmyy/OTPR.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12631v2-abstract-full').style.display = 'none'; document.getElementById('2502.12631v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12150">arXiv:2502.12150</a> <span> [<a href="https://arxiv.org/pdf/2502.12150">pdf</a>, <a href="https://arxiv.org/format/2502.12150">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Idiosyncrasies in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingjie Sun</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Y">Yida Yin</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhiqiu Xu</a>, <a href="/search/cs?searchtype=author&query=Kolter%2C+J+Z">J. Zico Kolter</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhuang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12150v1-abstract-short" style="display: inline;"> In this work, we unveil and study idiosyncrasies in Large Language Models (LLMs) -- unique patterns in their outputs that can be used to distinguish the models. To do so, we consider a simple classification task: given a particular text output, the objective is to predict the source LLM that generates the text. We evaluate this synthetic task across various groups of LLMs and find that simply fine… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12150v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12150v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12150v1-abstract-full" style="display: none;"> In this work, we unveil and study idiosyncrasies in Large Language Models (LLMs) -- unique patterns in their outputs that can be used to distinguish the models. To do so, we consider a simple classification task: given a particular text output, the objective is to predict the source LLM that generates the text. We evaluate this synthetic task across various groups of LLMs and find that simply fine-tuning existing text embedding models on LLM-generated texts yields excellent classification accuracy. Notably, we achieve 97.1% accuracy on held-out validation data in the five-way classification problem involving ChatGPT, Claude, Grok, Gemini, and DeepSeek. Our further investigation reveals that these idiosyncrasies are rooted in word-level distributions. These patterns persist even when the texts are rewritten, translated, or summarized by an external LLM, suggesting that they are also encoded in the semantic content. Additionally, we leverage LLM as judges to generate detailed, open-ended descriptions of each model's idiosyncrasies. Finally, we discuss the broader implications of our findings, particularly for training on synthetic data and inferring model similarity. Code is available at https://github.com/locuslab/llm-idiosyncrasies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12150v1-abstract-full').style.display = 'none'; document.getElementById('2502.12150v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Website at https://eric-mingjie.github.io/llm-idiosyncrasies/index.html</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12085">arXiv:2502.12085</a> <span> [<a href="https://arxiv.org/pdf/2502.12085">pdf</a>, <a href="https://arxiv.org/format/2502.12085">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> APB: Accelerating Distributed Long-Context Inference by Passing Compressed Context Blocks across GPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuxiang Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mingye Li</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+C">Chaojun Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Weilin Zhao</a>, <a href="/search/cs?searchtype=author&query=Ao%2C+S">Sun Ao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jie Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12085v1-abstract-short" style="display: inline;"> While long-context inference is crucial for advancing large language model (LLM) applications, its prefill speed remains a significant bottleneck. Current approaches, including sequence parallelism strategies and compute reduction through approximate attention mechanisms, still fall short of delivering optimal inference efficiency. This hinders scaling the inputs to longer sequences and processing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12085v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12085v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12085v1-abstract-full" style="display: none;"> While long-context inference is crucial for advancing large language model (LLM) applications, its prefill speed remains a significant bottleneck. Current approaches, including sequence parallelism strategies and compute reduction through approximate attention mechanisms, still fall short of delivering optimal inference efficiency. This hinders scaling the inputs to longer sequences and processing long-context queries in a timely manner. To address this, we introduce APB, an efficient long-context inference framework that leverages multi-host approximate attention to enhance prefill speed by reducing compute and enhancing parallelism simultaneously. APB introduces a communication mechanism for essential key-value pairs within a sequence parallelism framework, enabling a faster inference speed while maintaining task performance. We implement APB by incorporating a tailored FlashAttn kernel alongside optimized distribution strategies, supporting diverse models and parallelism configurations. APB achieves speedups of up to 9.2x, 4.2x, and 1.6x compared with FlashAttn, RingAttn, and StarAttn, respectively, without any observable task performance degradation. We provide the implementation and experiment code of APB in https://github.com/thunlp/APB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12085v1-abstract-full').style.display = 'none'; document.getElementById('2502.12085v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11546">arXiv:2502.11546</a> <span> [<a href="https://arxiv.org/pdf/2502.11546">pdf</a>, <a href="https://arxiv.org/format/2502.11546">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DCAD-2000: A Multilingual Dataset across 2000+ Languages with Data Cleaning as Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yingli Shen</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+W">Wen Lai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xueren Zhang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+K">Kangyang Luo</a>, <a href="/search/cs?searchtype=author&query=Fraser%2C+A">Alexander Fraser</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11546v1-abstract-short" style="display: inline;"> The rapid development of multilingual large language models (LLMs) highlights the need for high-quality, diverse, and clean multilingual datasets. In this paper, we introduce DCAD-2000 (Data Cleaning as Anomaly Detection), a large-scale multilingual corpus built using newly extracted Common Crawl data and existing multilingual datasets. DCAD-2000 includes over 2,282 languages, 46.72TB of data, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11546v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11546v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11546v1-abstract-full" style="display: none;"> The rapid development of multilingual large language models (LLMs) highlights the need for high-quality, diverse, and clean multilingual datasets. In this paper, we introduce DCAD-2000 (Data Cleaning as Anomaly Detection), a large-scale multilingual corpus built using newly extracted Common Crawl data and existing multilingual datasets. DCAD-2000 includes over 2,282 languages, 46.72TB of data, and 8.63 billion documents, spanning 155 high- and medium-resource languages and 159 writing scripts. To overcome the limitations of current data cleaning methods, which rely on manual heuristic thresholds, we propose reframing data cleaning as an anomaly detection task. This dynamic filtering approach significantly enhances data quality by identifying and removing noisy or anomalous content. We evaluate the quality of DCAD-2000 on the FineTask benchmark, demonstrating substantial improvements in multilingual dataset quality and task performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11546v1-abstract-full').style.display = 'none'; document.getElementById('2502.11546v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11471">arXiv:2502.11471</a> <span> [<a href="https://arxiv.org/pdf/2502.11471">pdf</a>, <a href="https://arxiv.org/format/2502.11471">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> GLTW: Joint Improved Graph Transformer and LLM via Three-Word Language for Knowledge Graph Completion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+K">Kangyang Luo</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yuzhuo Bai</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Cheng Gao</a>, <a href="/search/cs?searchtype=author&query=Si%2C+S">Shuzheng Si</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yingli Shen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhu Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhitong Wang</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+C">Cunliang Kong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenhao Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yufei Huang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+X">Xuantang Xiong</a>, <a href="/search/cs?searchtype=author&query=Han%2C+L">Lei Han</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11471v1-abstract-short" style="display: inline;"> Knowledge Graph Completion (KGC), which aims to infer missing or incomplete facts, is a crucial task for KGs. However, integrating the vital structural information of KGs into Large Language Models (LLMs) and outputting predictions deterministically remains challenging. To address this, we propose a new method called GLTW, which encodes the structural information of KGs and merges it with LLMs to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11471v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11471v1-abstract-full" style="display: none;"> Knowledge Graph Completion (KGC), which aims to infer missing or incomplete facts, is a crucial task for KGs. However, integrating the vital structural information of KGs into Large Language Models (LLMs) and outputting predictions deterministically remains challenging. To address this, we propose a new method called GLTW, which encodes the structural information of KGs and merges it with LLMs to enhance KGC performance. Specifically, we introduce an improved Graph Transformer (iGT) that effectively encodes subgraphs with both local and global structural information and inherits the characteristics of language model, bypassing training from scratch. Also, we develop a subgraph-based multi-classification training objective, using all entities within KG as classification objects, to boost learning efficiency.Importantly, we combine iGT with an LLM that takes KG language prompts as input.Our extensive experiments on various KG datasets show that GLTW achieves significant performance gains compared to SOTA baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11471v1-abstract-full').style.display = 'none'; document.getElementById('2502.11471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11380">arXiv:2502.11380</a> <span> [<a href="https://arxiv.org/pdf/2502.11380">pdf</a>, <a href="https://arxiv.org/format/2502.11380">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Small World of Word Embeddings: A Comparative Study on Conceptual Spaces from LLMs of Different Scales </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhu Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Ying Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+K">KangYang Luo</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+C">Cunliang Kong</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11380v1-abstract-short" style="display: inline;"> A conceptual space represents concepts as nodes and semantic relatedness as edges. Word embeddings, combined with a similarity metric, provide an effective approach to constructing such a space. Typically, embeddings are derived from traditional distributed models or encoder-only pretrained models, whose objectives directly capture the meaning of the current token. In contrast, decoder-only models… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11380v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11380v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11380v1-abstract-full" style="display: none;"> A conceptual space represents concepts as nodes and semantic relatedness as edges. Word embeddings, combined with a similarity metric, provide an effective approach to constructing such a space. Typically, embeddings are derived from traditional distributed models or encoder-only pretrained models, whose objectives directly capture the meaning of the current token. In contrast, decoder-only models, including large language models (LLMs), predict the next token, making their embeddings less directly tied to the current token's semantics. Moreover, comparative studies on LLMs of different scales remain underexplored. In this paper, we construct a conceptual space using word embeddings from LLMs of varying scales and comparatively analyze their properties. We establish a network based on a linguistic typology-inspired connectivity hypothesis, examine global statistical properties, and compare LLMs of varying scales. Locally, we analyze conceptual pairs, WordNet relations, and a cross-lingual semantic network for qualitative words. Our results indicate that the constructed space exhibits small-world properties, characterized by a high clustering coefficient and short path lengths. Larger LLMs generate more intricate spaces, with longer paths reflecting richer relational structures and connections. Furthermore, the network serves as an efficient bridge for cross-lingual semantic mapping. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11380v1-abstract-full').style.display = 'none'; document.getElementById('2502.11380v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10362">arXiv:2502.10362</a> <span> [<a href="https://arxiv.org/pdf/2502.10362">pdf</a>, <a href="https://arxiv.org/format/2502.10362">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> CLaMP 3: Universal Music Information Retrieval Across Unaligned Modalities and Unseen Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+S">Shangda Wu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhancheng Guo</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+R">Ruibin Yuan</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Junyan Jiang</a>, <a href="/search/cs?searchtype=author&query=Doh%2C+S">Seungheon Doh</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+G">Gus Xia</a>, <a href="/search/cs?searchtype=author&query=Nam%2C+J">Juhan Nam</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaobing Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+F">Feng Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10362v2-abstract-short" style="display: inline;"> CLaMP 3 is a unified framework developed to address challenges of cross-modal and cross-lingual generalization in music information retrieval. Using contrastive learning, it aligns all major music modalities--including sheet music, performance signals, and audio recordings--with multilingual text in a shared representation space, enabling retrieval across unaligned modalities with text as a bridge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10362v2-abstract-full').style.display = 'inline'; document.getElementById('2502.10362v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10362v2-abstract-full" style="display: none;"> CLaMP 3 is a unified framework developed to address challenges of cross-modal and cross-lingual generalization in music information retrieval. Using contrastive learning, it aligns all major music modalities--including sheet music, performance signals, and audio recordings--with multilingual text in a shared representation space, enabling retrieval across unaligned modalities with text as a bridge. It features a multilingual text encoder adaptable to unseen languages, exhibiting strong cross-lingual generalization. Leveraging retrieval-augmented generation, we curated M4-RAG, a web-scale dataset consisting of 2.31 million music-text pairs. This dataset is enriched with detailed metadata that represents a wide array of global musical traditions. To advance future research, we release WikiMT-X, a benchmark comprising 1,000 triplets of sheet music, audio, and richly varied text descriptions. Experiments show that CLaMP 3 achieves state-of-the-art performance on multiple MIR tasks, significantly surpassing previous strong baselines and demonstrating excellent generalization in multimodal and multilingual music contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10362v2-abstract-full').style.display = 'none'; document.getElementById('2502.10362v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 8 figures, 12 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07340">arXiv:2502.07340</a> <span> [<a href="https://arxiv.org/pdf/2502.07340">pdf</a>, <a href="https://arxiv.org/format/2502.07340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Aligning Large Language Models to Follow Instructions and Hallucinate Less via Effective Data Filtering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Si%2C+S">Shuzheng Si</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Haozhe Zhao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Gang Chen</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Cheng Gao</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yuzhuo Bai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhitong Wang</a>, <a href="/search/cs?searchtype=author&query=An%2C+K">Kaikai An</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+K">Kangyang Luo</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+C">Chen Qian</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+F">Fanchao Qi</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+B">Baobao Chang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07340v2-abstract-short" style="display: inline;"> Training LLMs on data containing unfamiliar knowledge during the instruction tuning stage can encourage hallucinations. To address this challenge, we introduce NOVA, a novel framework designed to identify high-quality data that aligns well with the LLM's learned knowledge to reduce hallucinations. NOVA includes Internal Consistency Probing (ICP) and Semantic Equivalence Identification (SEI) to mea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07340v2-abstract-full').style.display = 'inline'; document.getElementById('2502.07340v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07340v2-abstract-full" style="display: none;"> Training LLMs on data containing unfamiliar knowledge during the instruction tuning stage can encourage hallucinations. To address this challenge, we introduce NOVA, a novel framework designed to identify high-quality data that aligns well with the LLM's learned knowledge to reduce hallucinations. NOVA includes Internal Consistency Probing (ICP) and Semantic Equivalence Identification (SEI) to measure how familiar the LLM is with instruction data. Specifically, ICP evaluates the LLM's understanding of the given instruction by calculating the tailored consistency among multiple self-generated responses. SEI further assesses the familiarity of the LLM with the target response by comparing it to the generated responses, using the proposed semantic clustering and well-designed voting strategy. Finally, to ensure the quality of selected samples, we introduce an expert-aligned reward model, considering characteristics beyond just familiarity. By considering data quality and avoiding unfamiliar data, we can utilize the selected data to effectively align LLMs to follow instructions and hallucinate less. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07340v2-abstract-full').style.display = 'none'; document.getElementById('2502.07340v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06257">arXiv:2502.06257</a> <span> [<a href="https://arxiv.org/pdf/2502.06257">pdf</a>, <a href="https://arxiv.org/format/2502.06257">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> K-ON: Stacking Knowledge On the Head Layer of Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+L">Lingbing Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichi Zhang</a>, <a href="/search/cs?searchtype=author&query=Bo%2C+Z">Zhongpu Bo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhuo Chen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengshu Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiqiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wen Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06257v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) have significantly improved various natural language processing (NLP) tasks. Typically, LLMs are trained to predict the next token, aligning well with many NLP tasks. However, in knowledge graph (KG) scenarios, entities are the fundamental units and identifying an entity requires at least several tokens. This leads to a granularity mismatch betwe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06257v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06257v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06257v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) have significantly improved various natural language processing (NLP) tasks. Typically, LLMs are trained to predict the next token, aligning well with many NLP tasks. However, in knowledge graph (KG) scenarios, entities are the fundamental units and identifying an entity requires at least several tokens. This leads to a granularity mismatch between KGs and natural languages. To address this issue, we propose K-ON, which integrates KG knowledge into the LLM by employing multiple head layers for next k-step prediction. K-ON can not only generate entity-level results in one step, but also enables contrastive loss against entities, which is the most powerful tool in KG representation learning. Experimental results show that K-ON outperforms state-of-the-art methods that incorporate text and even the other modalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06257v1-abstract-full').style.display = 'none'; document.getElementById('2502.06257v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2025 (Oral)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05573">arXiv:2502.05573</a> <span> [<a href="https://arxiv.org/pdf/2502.05573">pdf</a>, <a href="https://arxiv.org/format/2502.05573">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Low-Rank Agent-Specific Adaptation (LoRASA) for Multi-Agent Policy Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Beining Zhang</a>, <a href="/search/cs?searchtype=author&query=Kapoor%2C+A">Aditya Kapoor</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingfei Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05573v1-abstract-short" style="display: inline;"> Multi-agent reinforcement learning (MARL) often relies on \emph{parameter sharing (PS)} to scale efficiently. However, purely shared policies can stifle each agent's unique specialization, reducing overall performance in heterogeneous environments. We propose \textbf{Low-Rank Agent-Specific Adaptation (LoRASA)}, a novel approach that treats each agent's policy as a specialized ``task'' fine-tuned… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05573v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05573v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05573v1-abstract-full" style="display: none;"> Multi-agent reinforcement learning (MARL) often relies on \emph{parameter sharing (PS)} to scale efficiently. However, purely shared policies can stifle each agent's unique specialization, reducing overall performance in heterogeneous environments. We propose \textbf{Low-Rank Agent-Specific Adaptation (LoRASA)}, a novel approach that treats each agent's policy as a specialized ``task'' fine-tuned from a shared backbone. Drawing inspiration from parameter-efficient transfer methods, LoRASA appends small, low-rank adaptation matrices to each layer of the shared policy, naturally inducing \emph{parameter-space sparsity} that promotes both specialization and scalability. We evaluate LoRASA on challenging benchmarks including the StarCraft Multi-Agent Challenge (SMAC) and Multi-Agent MuJoCo (MAMuJoCo), implementing it atop widely used algorithms such as MAPPO and A2PO. Across diverse tasks, LoRASA matches or outperforms existing baselines \emph{while reducing memory and computational overhead}. Ablation studies on adapter rank, placement, and timing validate the method's flexibility and efficiency. Our results suggest LoRASA's potential to establish a new norm for MARL policy parameterization: combining a shared foundation for coordination with low-rank agent-specific refinements for individual specialization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05573v1-abstract-full').style.display = 'none'; document.getElementById('2502.05573v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 20 figures, 13 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05478">arXiv:2502.05478</a> <span> [<a href="https://arxiv.org/pdf/2502.05478">pdf</a>, <a href="https://arxiv.org/format/2502.05478">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> OntoTune: Ontology-Driven Self-training for Aligning Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiqiang Liu</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+C">Chengtao Gan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junjie Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichi Zhang</a>, <a href="/search/cs?searchtype=author&query=Bo%2C+Z">Zhongpu Bo</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengshu Sun</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wen Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05478v1-abstract-short" style="display: inline;"> Existing domain-specific Large Language Models (LLMs) are typically developed by fine-tuning general-purposed LLMs with large-scale domain-specific corpora. However, training on large-scale corpora often fails to effectively organize domain knowledge of LLMs, leading to fragmented understanding. Inspired by how humans connect concepts and organize knowledge through mind maps, we aim to emulate thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05478v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05478v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05478v1-abstract-full" style="display: none;"> Existing domain-specific Large Language Models (LLMs) are typically developed by fine-tuning general-purposed LLMs with large-scale domain-specific corpora. However, training on large-scale corpora often fails to effectively organize domain knowledge of LLMs, leading to fragmented understanding. Inspired by how humans connect concepts and organize knowledge through mind maps, we aim to emulate this approach by using ontology with hierarchical conceptual knowledge to reorganize LLM's domain knowledge. From this perspective, we propose an ontology-driven self-training framework called OntoTune, which aims to align LLMs with ontology through in-context learning, enabling the generation of responses guided by the ontology. We leverage in-context learning to identify whether the LLM has acquired the specific concept's ontology knowledge, and select the entries not yet mastered by LLM as the training set to further align the LLM with ontology. Compared to existing domain LLMs based on newly collected large-scale domain-specific corpora, our OntoTune, which relies on the existing, long-term developed ontology and LLM itself, significantly reduces data maintenance costs and offers improved generalization ability. We conduct our study in the medical domain to evaluate the effectiveness of OntoTune, utilizing a standardized medical ontology, SNOMED CT as our ontology source. Experimental results demonstrate that OntoTune achieves state-of-the-art performance in both in-ontology task hypernym discovery and out-of-ontology task medical domain QA. Moreover, compared to the latest direct ontology injection method TaxoLLaMA, our OntoTune better preserves original knowledge of LLM. The code and data are available at https://github.com/zjukg/OntoTune. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05478v1-abstract-full').style.display = 'none'; document.getElementById('2502.05478v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04864">arXiv:2502.04864</a> <span> [<a href="https://arxiv.org/pdf/2502.04864">pdf</a>, <a href="https://arxiv.org/format/2502.04864">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> $TAR^2$: Temporal-Agent Reward Redistribution for Optimal Policy Preservation in Multi-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kapoor%2C+A">Aditya Kapoor</a>, <a href="/search/cs?searchtype=author&query=Tessera%2C+K">Kale-ab Tessera</a>, <a href="/search/cs?searchtype=author&query=Baranwal%2C+M">Mayank Baranwal</a>, <a href="/search/cs?searchtype=author&query=Khadilkar%2C+H">Harshad Khadilkar</a>, <a href="/search/cs?searchtype=author&query=Albrecht%2C+S">Stefano Albrecht</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingfei Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04864v1-abstract-short" style="display: inline;"> In cooperative multi-agent reinforcement learning (MARL), learning effective policies is challenging when global rewards are sparse and delayed. This difficulty arises from the need to assign credit across both agents and time steps, a problem that existing methods often fail to address in episodic, long-horizon tasks. We propose Temporal-Agent Reward Redistribution $TAR^2$, a novel approach that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04864v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04864v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04864v1-abstract-full" style="display: none;"> In cooperative multi-agent reinforcement learning (MARL), learning effective policies is challenging when global rewards are sparse and delayed. This difficulty arises from the need to assign credit across both agents and time steps, a problem that existing methods often fail to address in episodic, long-horizon tasks. We propose Temporal-Agent Reward Redistribution $TAR^2$, a novel approach that decomposes sparse global rewards into agent-specific, time-step-specific components, thereby providing more frequent and accurate feedback for policy learning. Theoretically, we show that $TAR^2$ (i) aligns with potential-based reward shaping, preserving the same optimal policies as the original environment, and (ii) maintains policy gradient update directions identical to those under the original sparse reward, ensuring unbiased credit signals. Empirical results on two challenging benchmarks, SMACLite and Google Research Football, demonstrate that $TAR^2$ significantly stabilizes and accelerates convergence, outperforming strong baselines like AREL and STAS in both learning speed and final performance. These findings establish $TAR^2$ as a principled and practical solution for agent-temporal credit assignment in sparse-reward multi-agent systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04864v1-abstract-full').style.display = 'none'; document.getElementById('2502.04864v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 5 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03954">arXiv:2502.03954</a> <span> [<a href="https://arxiv.org/pdf/2502.03954">pdf</a>, <a href="https://arxiv.org/format/2502.03954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MAQInstruct: Instruction-based Unified Event Relation Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jun Xu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengshu Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiqiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03954v1-abstract-short" style="display: inline;"> Extracting event relations that deviate from known schemas has proven challenging for previous methods based on multi-class classification, MASK prediction, or prototype matching. Recent advancements in large language models have shown impressive performance through instruction tuning. Nevertheless, in the task of event relation extraction, instruction-based methods face several challenges: there… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03954v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03954v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03954v1-abstract-full" style="display: none;"> Extracting event relations that deviate from known schemas has proven challenging for previous methods based on multi-class classification, MASK prediction, or prototype matching. Recent advancements in large language models have shown impressive performance through instruction tuning. Nevertheless, in the task of event relation extraction, instruction-based methods face several challenges: there are a vast number of inference samples, and the relations between events are non-sequential. To tackle these challenges, we present an improved instruction-based event relation extraction framework named MAQInstruct. Firstly, we transform the task from extracting event relations using given event-event instructions to selecting events using given event-relation instructions, which reduces the number of samples required for inference. Then, by incorporating a bipartite matching loss, we reduce the dependency of the instruction-based method on the generation sequence. Our experimental results demonstrate that MAQInstruct significantly improves the performance of event relation extraction across multiple LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03954v1-abstract-full').style.display = 'none'; document.getElementById('2502.03954v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW 2025 short</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03843">arXiv:2502.03843</a> <span> [<a href="https://arxiv.org/pdf/2502.03843">pdf</a>, <a href="https://arxiv.org/format/2502.03843">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Improving Natural Language Understanding for LLMs via Large-Scale Instruction Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+L">Lin Yuan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jun Xu</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+H">Honghao Gui</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengshu Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiqiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+L">Lei Liang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03843v1-abstract-short" style="display: inline;"> High-quality, large-scale instructions are crucial for aligning large language models (LLMs), however, there is a severe shortage of instruction in the field of natural language understanding (NLU). Previous works on constructing NLU instructions mainly focus on information extraction (IE), neglecting tasks such as machine reading comprehension, question answering, and text classification. Further… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03843v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03843v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03843v1-abstract-full" style="display: none;"> High-quality, large-scale instructions are crucial for aligning large language models (LLMs), however, there is a severe shortage of instruction in the field of natural language understanding (NLU). Previous works on constructing NLU instructions mainly focus on information extraction (IE), neglecting tasks such as machine reading comprehension, question answering, and text classification. Furthermore, the lack of diversity in the data has led to a decreased generalization ability of trained LLMs in other NLU tasks and a noticeable decline in the fundamental model's general capabilities. To address this issue, we propose Hum, a large-scale, high-quality synthetic instruction corpus for NLU tasks, designed to enhance the NLU capabilities of LLMs. Specifically, Hum includes IE (either close IE or open IE), machine reading comprehension, text classification, and instruction generalist tasks, thereby enriching task diversity. Additionally, we introduce a human-LLMs collaborative mechanism to synthesize instructions, which enriches instruction diversity by incorporating guidelines, preference rules, and format variants. We conduct extensive experiments on 5 NLU tasks and 28 general capability evaluation datasets for LLMs. Experimental results show that Hum enhances the NLU capabilities of six LLMs by an average of 3.1\%, with no significant decline observed in other general capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03843v1-abstract-full').style.display = 'none'; document.getElementById('2502.03843v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03356">arXiv:2502.03356</a> <span> [<a href="https://arxiv.org/pdf/2502.03356">pdf</a>, <a href="https://arxiv.org/format/2502.03356">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Inverse Mixed Strategy Games with Generative Trajectory Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+M+M">Max Muchen Sun</a>, <a href="/search/cs?searchtype=author&query=Trautman%2C+P">Pete Trautman</a>, <a href="/search/cs?searchtype=author&query=Murphey%2C+T">Todd Murphey</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03356v1-abstract-short" style="display: inline;"> Game-theoretic models are effective tools for modeling multi-agent interactions, especially when robots need to coordinate with humans. However, applying these models requires inferring their specifications from observed behaviors -- a challenging task known as the inverse game problem. Existing inverse game approaches often struggle to account for behavioral uncertainty and measurement noise, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03356v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03356v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03356v1-abstract-full" style="display: none;"> Game-theoretic models are effective tools for modeling multi-agent interactions, especially when robots need to coordinate with humans. However, applying these models requires inferring their specifications from observed behaviors -- a challenging task known as the inverse game problem. Existing inverse game approaches often struggle to account for behavioral uncertainty and measurement noise, and leverage both offline and online data. To address these limitations, we propose an inverse game method that integrates a generative trajectory model into a differentiable mixed-strategy game framework. By representing the mixed strategy with a conditional variational autoencoder (CVAE), our method can infer high-dimensional, multi-modal behavior distributions from noisy measurements while adapting in real-time to new observations. We extensively evaluate our method in a simulated navigation benchmark, where the observations are generated by an unknown game model. Despite the model mismatch, our method can infer Nash-optimal actions comparable to those of the ground-truth model and the oracle inverse game baseline, even in the presence of uncertain agent objectives and noisy measurements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03356v1-abstract-full').style.display = 'none'; document.getElementById('2502.03356v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICRA 2025. 8 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01563">arXiv:2502.01563</a> <span> [<a href="https://arxiv.org/pdf/2502.01563">pdf</a>, <a href="https://arxiv.org/format/2502.01563">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Massive Values in Self-Attention Modules are the Key to Contextual Knowledge Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+M">Mingyu Jin</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+K">Kai Mei</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Wujiang Xu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingjie Sun</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruixiang Tang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+M">Mengnan Du</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zirui Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongfeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01563v1-abstract-short" style="display: inline;"> Large language models (LLMs) have achieved remarkable success in contextual knowledge understanding. In this paper, we show that these concentrated massive values consistently emerge in specific regions of attention queries (Q) and keys (K) while not having such patterns in values (V) in various modern transformer-based LLMs (Q, K, and V mean the representations output by the query, key, and value… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01563v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01563v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01563v1-abstract-full" style="display: none;"> Large language models (LLMs) have achieved remarkable success in contextual knowledge understanding. In this paper, we show that these concentrated massive values consistently emerge in specific regions of attention queries (Q) and keys (K) while not having such patterns in values (V) in various modern transformer-based LLMs (Q, K, and V mean the representations output by the query, key, and value layers respectively). Through extensive experiments, we further demonstrate that these massive values play a critical role in interpreting contextual knowledge (knowledge obtained from the current context window) rather than in retrieving parametric knowledge stored within the model's parameters. Our further investigation of quantization strategies reveals that ignoring these massive values leads to a pronounced drop in performance on tasks requiring rich contextual understanding, aligning with our analysis. Finally, we trace the emergence of concentrated massive values and find that such concentration is caused by Rotary Positional Encoding (RoPE), which has appeared since the first layers. These findings shed new light on how Q and K operate in LLMs and offer practical insights for model design and optimization. The Code is Available at https://github.com/MingyuJ666/Rope_with_LLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01563v1-abstract-full').style.display = 'none'; document.getElementById('2502.01563v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01456">arXiv:2502.01456</a> <span> [<a href="https://arxiv.org/pdf/2502.01456">pdf</a>, <a href="https://arxiv.org/format/2502.01456">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Process Reinforcement through Implicit Rewards </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cui%2C+G">Ganqu Cui</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+L">Lifan Yuan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zefan Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hanbin Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wendi Li</a>, <a href="/search/cs?searchtype=author&query=He%2C+B">Bingxiang He</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Y">Yuchen Fan</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tianyu Yu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qixin Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weize Chen</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jiarui Yuan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huayu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kaiyan Zhang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+X">Xingtai Lv</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuan Yao</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+H">Hao Peng</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yu Cheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+B">Bowen Zhou</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+N">Ning Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01456v1-abstract-short" style="display: inline;"> Dense process rewards have proven a more effective alternative to the sparse outcome-level rewards in the inference-time scaling of large language models (LLMs), particularly in tasks requiring complex multi-step reasoning. While dense rewards also offer an appealing choice for the reinforcement learning (RL) of LLMs since their fine-grained rewards have the potential to address some inherent issu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01456v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01456v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01456v1-abstract-full" style="display: none;"> Dense process rewards have proven a more effective alternative to the sparse outcome-level rewards in the inference-time scaling of large language models (LLMs), particularly in tasks requiring complex multi-step reasoning. While dense rewards also offer an appealing choice for the reinforcement learning (RL) of LLMs since their fine-grained rewards have the potential to address some inherent issues of outcome rewards, such as training efficiency and credit assignment, this potential remains largely unrealized. This can be primarily attributed to the challenges of training process reward models (PRMs) online, where collecting high-quality process labels is prohibitively expensive, making them particularly vulnerable to reward hacking. To address these challenges, we propose PRIME (Process Reinforcement through IMplicit rEwards), which enables online PRM updates using only policy rollouts and outcome labels through implict process rewards. PRIME combines well with various advantage functions and forgoes the dedicated reward model training phrase that existing approaches require, substantially reducing the development overhead. We demonstrate PRIME's effectiveness on competitional math and coding. Starting from Qwen2.5-Math-7B-Base, PRIME achieves a 15.1% average improvement across several key reasoning benchmarks over the SFT model. Notably, our resulting model, Eurus-2-7B-PRIME, surpasses Qwen2.5-Math-7B-Instruct on seven reasoning benchmarks with 10% of its training data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01456v1-abstract-full').style.display = 'none'; document.getElementById('2502.01456v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages. Model&Code&Data available at https://github.com/PRIME-RL/PRIME</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00585">arXiv:2502.00585</a> <span> [<a href="https://arxiv.org/pdf/2502.00585">pdf</a>, <a href="https://arxiv.org/format/2502.00585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Converting Transformers into DGNNs Form </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kuan-Chieh Wang</a>, <a href="/search/cs?searchtype=author&query=Chiu%2C+B">Bo-Wei Chiu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Min-Te Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00585v2-abstract-short" style="display: inline;"> Recent advances in deep learning have established Transformer architectures as the predominant modeling paradigm. Central to the success of Transformers is the self-attention mechanism, which scores the similarity between query and key matrices to modulate a value matrix. This operation bears striking similarities to digraph convolution, prompting an investigation into whether digraph convolution… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00585v2-abstract-full').style.display = 'inline'; document.getElementById('2502.00585v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00585v2-abstract-full" style="display: none;"> Recent advances in deep learning have established Transformer architectures as the predominant modeling paradigm. Central to the success of Transformers is the self-attention mechanism, which scores the similarity between query and key matrices to modulate a value matrix. This operation bears striking similarities to digraph convolution, prompting an investigation into whether digraph convolution could serve as an alternative to self-attention. In this study, we formalize this concept by introducing a synthetic unitary digraph convolution based on the digraph Fourier transform. The resulting model, which we term Converter, effectively converts a Transformer into a Directed Graph Neural Network (DGNN) form. We have tested Converter on Long-Range Arena benchmark, long document classification, and DNA sequence-based taxonomy classification. Our experimental results demonstrate that Converter achieves superior performance while maintaining computational efficiency and architectural simplicity, which establishes it as a lightweight yet powerful Transformer variant. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00585v2-abstract-full').style.display = 'none'; document.getElementById('2502.00585v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 3 figures, and 8 tables; pseudocode improved</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00354">arXiv:2502.00354</a> <span> [<a href="https://arxiv.org/pdf/2502.00354">pdf</a>, <a href="https://arxiv.org/format/2502.00354">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3696410.3714561">10.1145/3696410.3714561 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PM-MOE: Mixture of Experts on Private Model Parameters for Personalized Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yu Feng</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+Y">Yangli-ao Geng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yifan Zhu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zongfu Han</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xie Yu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+K">Kaiwen Xue</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengyang Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guangwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Meina Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00354v1-abstract-short" style="display: inline;"> Federated learning (FL) has gained widespread attention for its privacy-preserving and collaborative learning capabilities. Due to significant statistical heterogeneity, traditional FL struggles to generalize a shared model across diverse data domains. Personalized federated learning addresses this issue by dividing the model into a globally shared part and a locally private part, with the local m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00354v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00354v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00354v1-abstract-full" style="display: none;"> Federated learning (FL) has gained widespread attention for its privacy-preserving and collaborative learning capabilities. Due to significant statistical heterogeneity, traditional FL struggles to generalize a shared model across diverse data domains. Personalized federated learning addresses this issue by dividing the model into a globally shared part and a locally private part, with the local model correcting representation biases introduced by the global model. Nevertheless, locally converged parameters more accurately capture domain-specific knowledge, and current methods overlook the potential benefits of these parameters. To address these limitations, we propose PM-MoE architecture. This architecture integrates a mixture of personalized modules and an energy-based personalized modules denoising, enabling each client to select beneficial personalized parameters from other clients. We applied the PM-MoE architecture to nine recent model-split-based personalized federated learning algorithms, achieving performance improvements with minimal additional training. Extensive experiments on six widely adopted datasets and two heterogeneity settings validate the effectiveness of our approach. The source code is available at \url{https://github.com/dannis97500/PM-MOE}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00354v1-abstract-full').style.display = 'none'; document.getElementById('2502.00354v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18993">arXiv:2501.18993</a> <span> [<a href="https://arxiv.org/pdf/2501.18993">pdf</a>, <a href="https://arxiv.org/format/2501.18993">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Visual Autoregressive Modeling for Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+Y">Yunpeng Qu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+K">Kun Yuan</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jinhua Hao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kai Zhao</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Q">Qizhi Xie</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Ming Sun</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chao Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18993v1-abstract-short" style="display: inline;"> Image Super-Resolution (ISR) has seen significant progress with the introduction of remarkable generative models. However, challenges such as the trade-off issues between fidelity and realism, as well as computational complexity, have also posed limitations on their application. Building upon the tremendous success of autoregressive models in the language domain, we propose \textbf{VARSR}, a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18993v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18993v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18993v1-abstract-full" style="display: none;"> Image Super-Resolution (ISR) has seen significant progress with the introduction of remarkable generative models. However, challenges such as the trade-off issues between fidelity and realism, as well as computational complexity, have also posed limitations on their application. Building upon the tremendous success of autoregressive models in the language domain, we propose \textbf{VARSR}, a novel visual autoregressive modeling for ISR framework with the form of next-scale prediction. To effectively integrate and preserve semantic information in low-resolution images, we propose using prefix tokens to incorporate the condition. Scale-aligned Rotary Positional Encodings are introduced to capture spatial structures and the diffusion refiner is utilized for modeling quantization residual loss to achieve pixel-level fidelity. Image-based Classifier-free Guidance is proposed to guide the generation of more realistic images. Furthermore, we collect large-scale data and design a training process to obtain robust generative priors. Quantitative and qualitative results show that VARSR is capable of generating high-fidelity and high-realism images with more efficiency than diffusion-based methods. Our codes will be released at https://github.com/qyp2000/VARSR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18993v1-abstract-full').style.display = 'none'; document.getElementById('2501.18993v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages; 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18913">arXiv:2501.18913</a> <span> [<a href="https://arxiv.org/pdf/2501.18913">pdf</a>, <a href="https://arxiv.org/format/2501.18913">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Diffusion Posterior Sampling: From Conditional Score Estimator to Maximizing a Posterior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+T">Tongda Xu</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xiyan Cai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinjie Zhang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+X">Xingtong Ge</a>, <a href="/search/cs?searchtype=author&query=He%2C+D">Dailan He</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Ming Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingjing Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ya-Qin Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jian Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18913v1-abstract-short" style="display: inline;"> Recent advancements in diffusion models have been leveraged to address inverse problems without additional training, and Diffusion Posterior Sampling (DPS) (Chung et al., 2022a) is among the most popular approaches. Previous analyses suggest that DPS accomplishes posterior sampling by approximating the conditional score. While in this paper, we demonstrate that the conditional score approximation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18913v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18913v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18913v1-abstract-full" style="display: none;"> Recent advancements in diffusion models have been leveraged to address inverse problems without additional training, and Diffusion Posterior Sampling (DPS) (Chung et al., 2022a) is among the most popular approaches. Previous analyses suggest that DPS accomplishes posterior sampling by approximating the conditional score. While in this paper, we demonstrate that the conditional score approximation employed by DPS is not as effective as previously assumed, but rather aligns more closely with the principle of maximizing a posterior (MAP). This assertion is substantiated through an examination of DPS on 512x512 ImageNet images, revealing that: 1) DPS's conditional score estimation significantly diverges from the score of a well-trained conditional diffusion model and is even inferior to the unconditional score; 2) The mean of DPS's conditional score estimation deviates significantly from zero, rendering it an invalid score estimation; 3) DPS generates high-quality samples with significantly lower diversity. In light of the above findings, we posit that DPS more closely resembles MAP than a conditional score estimator, and accordingly propose the following enhancements to DPS: 1) we explicitly maximize the posterior through multi-step gradient ascent and projection; 2) we utilize a light-weighted conditional score estimator trained with only 100 images and 8 GPU hours. Extensive experimental results indicate that these proposed improvements significantly enhance DPS's performance. The source code for these improvements is provided in https://github.com/tongdaxu/Rethinking-Diffusion-Posterior-Sampling-From-Conditional-Score-Estimator-to-Maximizing-a-Posterior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18913v1-abstract-full').style.display = 'none'; document.getElementById('2501.18913v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15478">arXiv:2501.15478</a> <span> [<a href="https://arxiv.org/pdf/2501.15478">pdf</a>, <a href="https://arxiv.org/format/2501.15478">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LoRAGuard: An Effective Black-box Watermarking Approach for LoRAs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+P">Peizhuo Lv</a>, <a href="/search/cs?searchtype=author&query=Xiahou%2C+Y">Yiran Xiahou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Congyi Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengjie Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shengzhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingjun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15478v1-abstract-short" style="display: inline;"> LoRA (Low-Rank Adaptation) has achieved remarkable success in the parameter-efficient fine-tuning of large models. The trained LoRA matrix can be integrated with the base model through addition or negation operation to improve performance on downstream tasks. However, the unauthorized use of LoRAs to generate harmful content highlights the need for effective mechanisms to trace their usage. A natu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15478v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15478v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15478v1-abstract-full" style="display: none;"> LoRA (Low-Rank Adaptation) has achieved remarkable success in the parameter-efficient fine-tuning of large models. The trained LoRA matrix can be integrated with the base model through addition or negation operation to improve performance on downstream tasks. However, the unauthorized use of LoRAs to generate harmful content highlights the need for effective mechanisms to trace their usage. A natural solution is to embed watermarks into LoRAs to detect unauthorized misuse. However, existing methods struggle when multiple LoRAs are combined or negation operation is applied, as these can significantly degrade watermark performance. In this paper, we introduce LoRAGuard, a novel black-box watermarking technique for detecting unauthorized misuse of LoRAs. To support both addition and negation operations, we propose the Yin-Yang watermark technique, where the Yin watermark is verified during negation operation and the Yang watermark during addition operation. Additionally, we propose a shadow-model-based watermark training approach that significantly improves effectiveness in scenarios involving multiple integrated LoRAs. Extensive experiments on both language and diffusion models show that LoRAGuard achieves nearly 100% watermark verification success and demonstrates strong effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15478v1-abstract-full').style.display = 'none'; document.getElementById('2501.15478v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15383">arXiv:2501.15383</a> <span> [<a href="https://arxiv.org/pdf/2501.15383">pdf</a>, <a href="https://arxiv.org/format/2501.15383">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Qwen2.5-1M Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+A">An Yang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+B">Bowen Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chengyuan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dayiheng Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fei Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haoyan Huang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jiandong Jiang</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+J">Jianhong Tu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingren Zhou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Junyang Lin</a>, <a href="/search/cs?searchtype=author&query=Dang%2C+K">Kai Dang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+K">Kexin Yang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Le Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mei Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Minmin Sun</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qin Zhu</a>, <a href="/search/cs?searchtype=author&query=Men%2C+R">Rui Men</a>, <a href="/search/cs?searchtype=author&query=He%2C+T">Tao He</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Weijia Xu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+W">Wenbiao Yin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wenyuan Yu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xiafei Qiu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+X">Xingzhang Ren</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xinlong Yang</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15383v1-abstract-short" style="display: inline;"> We introduce Qwen2.5-1M, a series of models that extend the context length to 1 million tokens. Compared to the previous 128K version, the Qwen2.5-1M series have significantly enhanced long-context capabilities through long-context pre-training and post-training. Key techniques such as long data synthesis, progressive pre-training, and multi-stage supervised fine-tuning are employed to effectively… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15383v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15383v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15383v1-abstract-full" style="display: none;"> We introduce Qwen2.5-1M, a series of models that extend the context length to 1 million tokens. Compared to the previous 128K version, the Qwen2.5-1M series have significantly enhanced long-context capabilities through long-context pre-training and post-training. Key techniques such as long data synthesis, progressive pre-training, and multi-stage supervised fine-tuning are employed to effectively enhance long-context performance while reducing training costs. To promote the use of long-context models among a broader user base, we present and open-source our inference framework. This framework includes a length extrapolation method that can expand the model context lengths by at least four times, or even more, without additional training. To reduce inference costs, we implement a sparse attention method along with chunked prefill optimization for deployment scenarios and a sparsity refinement method to improve precision. Additionally, we detail our optimizations in the inference engine, including kernel optimization, pipeline parallelism, and scheduling optimization, which significantly enhance overall inference performance. By leveraging our inference framework, the Qwen2.5-1M models achieve a remarkable 3x to 7x prefill speedup in scenarios with 1 million tokens of context. This framework provides an efficient and powerful solution for developing applications that require long-context processing using open-source models. The Qwen2.5-1M series currently includes the open-source models Qwen2.5-7B-Instruct-1M and Qwen2.5-14B-Instruct-1M, as well as the API-accessed model Qwen2.5-Turbo. Evaluations show that Qwen2.5-1M models have been greatly improved in long-context tasks without compromising performance in short-context scenarios. Specifically, the Qwen2.5-14B-Instruct-1M model significantly outperforms GPT-4o-mini in long-context tasks and supports contexts eight times longer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15383v1-abstract-full').style.display = 'none'; document.getElementById('2501.15383v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15068">arXiv:2501.15068</a> <span> [<a href="https://arxiv.org/pdf/2501.15068">pdf</a>, <a href="https://arxiv.org/format/2501.15068">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> An Atomic Skill Library Construction Method for Data-Efficient Embodied Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+D">Dongjiang Li</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Bo Peng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chang Li</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+N">Ning Qiao</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Q">Qi Zheng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Lei Sun</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yusen Qin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bangguo Li</a>, <a href="/search/cs?searchtype=author&query=Luan%2C+Y">Yifeng Luan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bo Wu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+Y">Yibing Zhan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingang Sun</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+T">Tong Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lusong Li</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Hui Shen</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiaodong He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15068v3-abstract-short" style="display: inline;"> Embodied manipulation is a fundamental ability in the realm of embodied artificial intelligence. Although current embodied manipulation models show certain generalizations in specific settings, they struggle in new environments and tasks due to the complexity and diversity of real-world scenarios. The traditional end-to-end data collection and training manner leads to significant data demands. Dec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15068v3-abstract-full').style.display = 'inline'; document.getElementById('2501.15068v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15068v3-abstract-full" style="display: none;"> Embodied manipulation is a fundamental ability in the realm of embodied artificial intelligence. Although current embodied manipulation models show certain generalizations in specific settings, they struggle in new environments and tasks due to the complexity and diversity of real-world scenarios. The traditional end-to-end data collection and training manner leads to significant data demands. Decomposing end-to-end tasks into atomic skills helps reduce data requirements and improves the task success rate. However, existing methods are limited by predefined skill sets that cannot be dynamically updated. To address the issue, we introduce a three-wheeled data-driven method to build an atomic skill library. We divide tasks into subtasks using the Vision-Language-Planning (VLP). Then, atomic skill definitions are formed by abstracting the subtasks. Finally, an atomic skill library is constructed via data collection and Vision-Language-Action (VLA) fine-tuning. As the atomic skill library expands dynamically with the three-wheel update strategy, the range of tasks it can cover grows naturally. In this way, our method shifts focus from end-to-end tasks to atomic skills, significantly reducing data costs while maintaining high performance and enabling efficient adaptation to new tasks. Extensive experiments in real-world settings demonstrate the effectiveness and efficiency of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15068v3-abstract-full').style.display = 'none'; document.getElementById('2501.15068v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11858">arXiv:2501.11858</a> <span> [<a href="https://arxiv.org/pdf/2501.11858">pdf</a>, <a href="https://arxiv.org/format/2501.11858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> EmbodiedEval: Evaluate Multimodal LLMs as Embodied Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+Z">Zhili Cheng</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+Y">Yuge Tu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ran Li</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+S">Shiqi Dai</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jinyi Hu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shengding Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiahao Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yang Shi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tianyu Yu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weize Chen</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L">Lei Shi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11858v1-abstract-short" style="display: inline;"> Multimodal Large Language Models (MLLMs) have shown significant advancements, providing a promising future for embodied agents. Existing benchmarks for evaluating MLLMs primarily utilize static images or videos, limiting assessments to non-interactive scenarios. Meanwhile, existing embodied AI benchmarks are task-specific and not diverse enough, which do not adequately evaluate the embodied capabi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11858v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11858v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11858v1-abstract-full" style="display: none;"> Multimodal Large Language Models (MLLMs) have shown significant advancements, providing a promising future for embodied agents. Existing benchmarks for evaluating MLLMs primarily utilize static images or videos, limiting assessments to non-interactive scenarios. Meanwhile, existing embodied AI benchmarks are task-specific and not diverse enough, which do not adequately evaluate the embodied capabilities of MLLMs. To address this, we propose EmbodiedEval, a comprehensive and interactive evaluation benchmark for MLLMs with embodied tasks. EmbodiedEval features 328 distinct tasks within 125 varied 3D scenes, each of which is rigorously selected and annotated. It covers a broad spectrum of existing embodied AI tasks with significantly enhanced diversity, all within a unified simulation and evaluation framework tailored for MLLMs. The tasks are organized into five categories: navigation, object interaction, social interaction, attribute question answering, and spatial question answering to assess different capabilities of the agents. We evaluated the state-of-the-art MLLMs on EmbodiedEval and found that they have a significant shortfall compared to human level on embodied tasks. Our analysis demonstrates the limitations of existing MLLMs in embodied capabilities, providing insights for their future development. We open-source all evaluation data and simulation framework at https://github.com/thunlp/EmbodiedEval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11858v1-abstract-full').style.display = 'none'; document.getElementById('2501.11858v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10560">arXiv:2501.10560</a> <span> [<a href="https://arxiv.org/pdf/2501.10560">pdf</a>, <a href="https://arxiv.org/format/2501.10560">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Picachv: Formally Verified Data Use Policy Enforcement for Secure Data Analytics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+H+H">Haobin Hiroki Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongbo Chen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingshen Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chenghong Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">XiaoFeng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10560v1-abstract-short" style="display: inline;"> Ensuring the proper use of sensitive data in analytics under complex privacy policies is an increasingly critical challenge. Many existing approaches lack portability, verifiability, and scalability across diverse data processing frameworks. We introduce Picachv, a novel security monitor that automatically enforces data use policies. It works on relational algebra as an abstraction for program sem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10560v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10560v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10560v1-abstract-full" style="display: none;"> Ensuring the proper use of sensitive data in analytics under complex privacy policies is an increasingly critical challenge. Many existing approaches lack portability, verifiability, and scalability across diverse data processing frameworks. We introduce Picachv, a novel security monitor that automatically enforces data use policies. It works on relational algebra as an abstraction for program semantics, enabling policy enforcement on query plans generated by programs during execution. This approach simplifies analysis across diverse analytical operations and supports various front-end query languages. By formalizing both data use policies and relational algebra semantics in Coq, we prove that Picachv correctly enforces policies. Picachv also leverages Trusted Execution Environments (TEEs) to enhance trust in runtime, providing provable policy compliance to stakeholders that the analytical tasks comply with their data use policies. We integrated Picachv into Polars, a state-of-the-art data analytics framework, and evaluate its performance using the TPC-H benchmark. We also apply our approach to real-world use cases. Our work demonstrates the practical application of formal methods in securing data analytics, addressing key challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10560v1-abstract-full').style.display = 'none'; document.getElementById('2501.10560v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10182">arXiv:2501.10182</a> <span> [<a href="https://arxiv.org/pdf/2501.10182">pdf</a>, <a href="https://arxiv.org/format/2501.10182">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Secure Semantic Communication With Homomorphic Encryption </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Meng%2C+R">Rui Meng</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+D">Dayu Fan</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Haixiao Gao</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Y">Yifan Yuan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bizhu Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaodong Xu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengying Sun</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+C">Chen Dong</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+X">Xiaofeng Tao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10182v1-abstract-short" style="display: inline;"> In recent years, Semantic Communication (SemCom), which aims to achieve efficient and reliable transmission of meaning between agents, has garnered significant attention from both academia and industry. To ensure the security of communication systems, encryption techniques are employed to safeguard confidentiality and integrity. However, traditional cryptography-based encryption algorithms encount… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10182v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10182v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10182v1-abstract-full" style="display: none;"> In recent years, Semantic Communication (SemCom), which aims to achieve efficient and reliable transmission of meaning between agents, has garnered significant attention from both academia and industry. To ensure the security of communication systems, encryption techniques are employed to safeguard confidentiality and integrity. However, traditional cryptography-based encryption algorithms encounter obstacles when applied to SemCom. Motivated by this, this paper explores the feasibility of applying homomorphic encryption to SemCom. Initially, we review the encryption algorithms utilized in mobile communication systems and analyze the challenges associated with their application to SemCom. Subsequently, we employ scale-invariant feature transform to demonstrate that semantic features can be preserved in homomorphic encrypted ciphertext. Based on this finding, we propose a task-oriented SemCom scheme secured through homomorphic encryption. We design the privacy preserved deep joint source-channel coding (JSCC) encoder and decoder, and the frequency of key updates can be adjusted according to service requirements without compromising transmission performance. Simulation results validate that, when compared to plaintext images, the proposed scheme can achieve almost the same classification accuracy performance when dealing with homomorphic ciphertext images. Furthermore, we provide potential future research directions for homomorphic encrypted SemCom. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10182v1-abstract-full').style.display = 'none'; document.getElementById('2501.10182v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08862">arXiv:2501.08862</a> <span> [<a href="https://arxiv.org/pdf/2501.08862">pdf</a>, <a href="https://arxiv.org/format/2501.08862">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> ARMOR: Shielding Unlearnable Examples against Data Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gong%2C+X">Xueluan Gong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuji Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yanjiao Chen</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Haocheng Dong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yiming Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengyuan Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuaike Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qian Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chen Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08862v1-abstract-short" style="display: inline;"> Private data, when published online, may be collected by unauthorized parties to train deep neural networks (DNNs). To protect privacy, defensive noises can be added to original samples to degrade their learnability by DNNs. Recently, unlearnable examples are proposed to minimize the training loss such that the model learns almost nothing. However, raw data are often pre-processed before being use… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08862v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08862v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08862v1-abstract-full" style="display: none;"> Private data, when published online, may be collected by unauthorized parties to train deep neural networks (DNNs). To protect privacy, defensive noises can be added to original samples to degrade their learnability by DNNs. Recently, unlearnable examples are proposed to minimize the training loss such that the model learns almost nothing. However, raw data are often pre-processed before being used for training, which may restore the private information of protected data. In this paper, we reveal the data privacy violation induced by data augmentation, a commonly used data pre-processing technique to improve model generalization capability, which is the first of its kind as far as we are concerned. We demonstrate that data augmentation can significantly raise the accuracy of the model trained on unlearnable examples from 21.3% to 66.1%. To address this issue, we propose a defense framework, dubbed ARMOR, to protect data privacy from potential breaches of data augmentation. To overcome the difficulty of having no access to the model training process, we design a non-local module-assisted surrogate model that better captures the effect of data augmentation. In addition, we design a surrogate augmentation selection strategy that maximizes distribution alignment between augmented and non-augmented samples, to choose the optimal augmentation strategy for each class. We also use a dynamic step size adjustment algorithm to enhance the defensive noise generation process. Extensive experiments are conducted on 4 datasets and 5 data augmentation methods to verify the performance of ARMOR. Comparisons with 6 state-of-the-art defense methods have demonstrated that ARMOR can preserve the unlearnability of protected private data under data augmentation. ARMOR reduces the test accuracy of the model trained on augmented protected samples by as much as 60% more than baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08862v1-abstract-full').style.display = 'none'; document.getElementById('2501.08862v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08665">arXiv:2501.08665</a> <span> [<a href="https://arxiv.org/pdf/2501.08665">pdf</a>, <a href="https://arxiv.org/format/2501.08665">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Facial Image Privacy Preservation in Cloud-Based Services </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengyuan Sun</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+X">Xueluan Gong</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yanjiao Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qian Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08665v1-abstract-short" style="display: inline;"> Facial recognition models are increasingly employed by commercial enterprises, government agencies, and cloud service providers for identity verification, consumer services, and surveillance. These models are often trained using vast amounts of facial data processed and stored in cloud-based platforms, raising significant privacy concerns. Users' facial images may be exploited without their consen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08665v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08665v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08665v1-abstract-full" style="display: none;"> Facial recognition models are increasingly employed by commercial enterprises, government agencies, and cloud service providers for identity verification, consumer services, and surveillance. These models are often trained using vast amounts of facial data processed and stored in cloud-based platforms, raising significant privacy concerns. Users' facial images may be exploited without their consent, leading to potential data breaches and misuse. This survey presents a comprehensive review of current methods aimed at preserving facial image privacy in cloud-based services. We categorize these methods into two primary approaches: image obfuscation-based protection and adversarial perturbation-based protection. We provide an in-depth analysis of both categories, offering qualitative and quantitative comparisons of their effectiveness. Additionally, we highlight unresolved challenges and propose future research directions to improve privacy preservation in cloud computing environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08665v1-abstract-full').style.display = 'none'; document.getElementById('2501.08665v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07988">arXiv:2501.07988</a> <span> [<a href="https://arxiv.org/pdf/2501.07988">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GAC-Net_Geometric and attention-based Network for Depth Completion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+K">Kuang Zhu</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+X">Xingli Gan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Min Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07988v1-abstract-short" style="display: inline;"> Depth completion is a key task in autonomous driving, aiming to complete sparse LiDAR depth measurements into high-quality dense depth maps through image guidance. However, existing methods usually treat depth maps as an additional channel of color images, or directly perform convolution on sparse data, failing to fully exploit the 3D geometric information in depth maps, especially with limited pe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07988v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07988v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07988v1-abstract-full" style="display: none;"> Depth completion is a key task in autonomous driving, aiming to complete sparse LiDAR depth measurements into high-quality dense depth maps through image guidance. However, existing methods usually treat depth maps as an additional channel of color images, or directly perform convolution on sparse data, failing to fully exploit the 3D geometric information in depth maps, especially with limited performance in complex boundaries and sparse areas. To address these issues, this paper proposes a depth completion network combining channel attention mechanism and 3D global feature perception (CGA-Net). The main innovations include: 1) Utilizing PointNet++ to extract global 3D geometric features from sparse depth maps, enhancing the scene perception ability of low-line LiDAR data; 2) Designing a channel-attention-based multimodal feature fusion module to efficiently integrate sparse depth, RGB images, and 3D geometric features; 3) Combining residual learning with CSPN++ to optimize the depth refinement stage, further improving the completion quality in edge areas and complex scenes. Experiments on the KITTI depth completion dataset show that CGA-Net can significantly improve the prediction accuracy of dense depth maps, achieving a new state-of-the-art (SOTA), and demonstrating strong robustness to sparse and complex scenes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07988v1-abstract-full').style.display = 'none'; document.getElementById('2501.07988v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13pages,4 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07171">arXiv:2501.07171</a> <span> [<a href="https://arxiv.org/pdf/2501.07171">pdf</a>, <a href="https://arxiv.org/format/2501.07171">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and Vision-Language Models Derived from Scientific Literature </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lozano%2C+A">Alejandro Lozano</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M+W">Min Woo Sun</a>, <a href="/search/cs?searchtype=author&query=Burgess%2C+J">James Burgess</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liangyu Chen</a>, <a href="/search/cs?searchtype=author&query=Nirschl%2C+J+J">Jeffrey J Nirschl</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">Jeffrey Gu</a>, <a href="/search/cs?searchtype=author&query=Lopez%2C+I">Ivan Lopez</a>, <a href="/search/cs?searchtype=author&query=Aklilu%2C+J">Josiah Aklilu</a>, <a href="/search/cs?searchtype=author&query=Katzer%2C+A+W">Austin Wolfgang Katzer</a>, <a href="/search/cs?searchtype=author&query=Chiu%2C+C">Collin Chiu</a>, <a href="/search/cs?searchtype=author&query=Rau%2C+A">Anita Rau</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaohan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+A+S">Alfred Seunghoon Song</a>, <a href="/search/cs?searchtype=author&query=Tibshirani%2C+R">Robert Tibshirani</a>, <a href="/search/cs?searchtype=author&query=Yeung-Levy%2C+S">Serena Yeung-Levy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07171v2-abstract-short" style="display: inline;"> The development of vision-language models (VLMs) is driven by large-scale and diverse multimodal datasets. However, progress toward generalist biomedical VLMs is limited by the lack of annotated, publicly accessible datasets across biology and medicine. Existing efforts are restricted to narrow domains, missing the full diversity of biomedical knowledge encoded in scientific literature. To address… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07171v2-abstract-full').style.display = 'inline'; document.getElementById('2501.07171v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07171v2-abstract-full" style="display: none;"> The development of vision-language models (VLMs) is driven by large-scale and diverse multimodal datasets. However, progress toward generalist biomedical VLMs is limited by the lack of annotated, publicly accessible datasets across biology and medicine. Existing efforts are restricted to narrow domains, missing the full diversity of biomedical knowledge encoded in scientific literature. To address this gap, we introduce BIOMEDICA, a scalable, open-source framework to extract, annotate, and serialize the entirety of the PubMed Central Open Access subset into an easy-to-use, publicly accessible dataset. Our framework produces a comprehensive archive with over 24 million unique image-text pairs from over 6 million articles. Metadata and expert-guided annotations are also provided. We demonstrate the utility and accessibility of our resource by releasing BMCA-CLIP, a suite of CLIP-style models continuously pre-trained on the BIOMEDICA dataset via streaming, eliminating the need to download 27 TB of data locally. On average, our models achieve state-of-the-art performance across 40 tasks - spanning pathology, radiology, ophthalmology, dermatology, surgery, molecular biology, parasitology, and cell biology - excelling in zero-shot classification with a 6.56% average improvement (as high as 29.8% and 17.5% in dermatology and ophthalmology, respectively), and stronger image-text retrieval, all while using 10x less compute. To foster reproducibility and collaboration, we release our codebase and dataset for the broader research community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07171v2-abstract-full').style.display = 'none'; document.getElementById('2501.07171v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07071">arXiv:2501.07071</a> <span> [<a href="https://arxiv.org/pdf/2501.07071">pdf</a>, <a href="https://arxiv.org/format/2501.07071">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Value Compass Leaderboard: A Platform for Fundamental and Validated Evaluation of LLMs Values </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+J">Jing Yao</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xiaoyuan Yi</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+S">Shitong Duan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jindong Wang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yuzhuo Bai</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+M">Muhua Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Peng Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+T">Tun Lu</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Z">Zhicheng Dou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+X">Xing Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07071v1-abstract-short" style="display: inline;"> As Large Language Models (LLMs) achieve remarkable breakthroughs, aligning their values with humans has become imperative for their responsible development and customized applications. However, there still lack evaluations of LLMs values that fulfill three desirable goals. (1) Value Clarification: We expect to clarify the underlying values of LLMs precisely and comprehensively, while current evalu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07071v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07071v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07071v1-abstract-full" style="display: none;"> As Large Language Models (LLMs) achieve remarkable breakthroughs, aligning their values with humans has become imperative for their responsible development and customized applications. However, there still lack evaluations of LLMs values that fulfill three desirable goals. (1) Value Clarification: We expect to clarify the underlying values of LLMs precisely and comprehensively, while current evaluations focus narrowly on safety risks such as bias and toxicity. (2) Evaluation Validity: Existing static, open-source benchmarks are prone to data contamination and quickly become obsolete as LLMs evolve. Additionally, these discriminative evaluations uncover LLMs' knowledge about values, rather than valid assessments of LLMs' behavioral conformity to values. (3) Value Pluralism: The pluralistic nature of human values across individuals and cultures is largely ignored in measuring LLMs value alignment. To address these challenges, we presents the Value Compass Leaderboard, with three correspondingly designed modules. It (i) grounds the evaluation on motivationally distinct \textit{basic values to clarify LLMs' underlying values from a holistic view; (ii) applies a \textit{generative evolving evaluation framework with adaptive test items for evolving LLMs and direct value recognition from behaviors in realistic scenarios; (iii) propose a metric that quantifies LLMs alignment with a specific value as a weighted sum over multiple dimensions, with weights determined by pluralistic values. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07071v1-abstract-full').style.display = 'none'; document.getElementById('2501.07071v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06598">arXiv:2501.06598</a> <span> [<a href="https://arxiv.org/pdf/2501.06598">pdf</a>, <a href="https://arxiv.org/format/2501.06598">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ChartCoder: Advancing Multimodal Large Language Model for Chart-to-Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xuanle Zhao</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+X">Xianzhen Luo</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Q">Qi Shi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chi Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Che%2C+W">Wanxiang Che</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06598v1-abstract-short" style="display: inline;"> Multimodal Large Language Models (MLLMs) have demonstrated remarkable capabilities in chart understanding tasks. However, interpreting charts with textual descriptions often leads to information loss, as it fails to fully capture the dense information embedded in charts. In contrast, parsing charts into code provides lossless representations that can effectively contain all critical details. Altho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06598v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06598v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06598v1-abstract-full" style="display: none;"> Multimodal Large Language Models (MLLMs) have demonstrated remarkable capabilities in chart understanding tasks. However, interpreting charts with textual descriptions often leads to information loss, as it fails to fully capture the dense information embedded in charts. In contrast, parsing charts into code provides lossless representations that can effectively contain all critical details. Although existing open-source MLLMs have achieved success in chart understanding tasks, they still face two major challenges when applied to chart-to-code tasks.: (1) Low executability and poor restoration of chart details in the generated code and (2) Lack of large-scale and diverse training data. To address these challenges, we propose \textbf{ChartCoder}, the first dedicated chart-to-code MLLM, which leverages Code LLMs as the language backbone to enhance the executability of the generated code. Furthermore, we introduce \textbf{Chart2Code-160k}, the first large-scale and diverse dataset for chart-to-code generation, and propose the \textbf{Snippet-of-Thought (SoT)} method, which transforms direct chart-to-code generation data into step-by-step generation. Experiments demonstrate that ChartCoder, with only 7B parameters, surpasses existing open-source MLLMs on chart-to-code benchmarks, achieving superior chart restoration and code excitability. Our code will be available at https://github.com/thunlp/ChartCoder. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06598v1-abstract-full').style.display = 'none'; document.getElementById('2501.06598v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05767">arXiv:2501.05767</a> <span> [<a href="https://arxiv.org/pdf/2501.05767">pdf</a>, <a href="https://arxiv.org/format/2501.05767">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Migician: Revealing the Magic of Free-Form Multi-Image Grounding in Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">You Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Heyu Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chi Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zonghao Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jinan Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuhua Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruixuan Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05767v3-abstract-short" style="display: inline;"> The recent advancement of Multimodal Large Language Models (MLLMs) has significantly improved their fine-grained perception of single images and general comprehension across multiple images. However, existing MLLMs still face challenges in achieving precise grounding in complex multi-image scenarios. To address this, we first explore a Chain-of-Thought (CoT) framework that integrates single-image… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05767v3-abstract-full').style.display = 'inline'; document.getElementById('2501.05767v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05767v3-abstract-full" style="display: none;"> The recent advancement of Multimodal Large Language Models (MLLMs) has significantly improved their fine-grained perception of single images and general comprehension across multiple images. However, existing MLLMs still face challenges in achieving precise grounding in complex multi-image scenarios. To address this, we first explore a Chain-of-Thought (CoT) framework that integrates single-image grounding with multi-image comprehension. While partially effective, it remains unstable and struggles to capture abstract visual information due to its non-end-to-end nature. Therefore, we introduce Migician, the first multi-image grounding model capable of performing free-form and accurate grounding across multiple images. To support this, we present the MGrounding-630k dataset, which comprises data for several multi-image grounding tasks derived from existing datasets, along with newly generated free-form grounding instruction-following data. Furthermore, we propose MIG-Bench, a comprehensive benchmark specifically designed for evaluating multi-image grounding capabilities. Experimental results demonstrate that our model achieves significantly superior multi-image grounding capabilities, outperforming the best existing MLLMs by 24.94% and even surpassing much larger 70B models. Our code, model, dataset, and benchmark are fully open-sourced at https://migician-vg.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05767v3-abstract-full').style.display = 'none'; document.getElementById('2501.05767v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05249">arXiv:2501.05249</a> <span> [<a href="https://arxiv.org/pdf/2501.05249">pdf</a>, <a href="https://arxiv.org/format/2501.05249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> RAG-WM: An Efficient Black-Box Watermarking Approach for Retrieval-Augmented Generation of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+P">Peizhuo Lv</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengjie Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaofeng Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shengzhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxuan Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Limin Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05249v1-abstract-short" style="display: inline;"> In recent years, tremendous success has been witnessed in Retrieval-Augmented Generation (RAG), widely used to enhance Large Language Models (LLMs) in domain-specific, knowledge-intensive, and privacy-sensitive tasks. However, attackers may steal those valuable RAGs and deploy or commercialize them, making it essential to detect Intellectual Property (IP) infringement. Most existing ownership prot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05249v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05249v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05249v1-abstract-full" style="display: none;"> In recent years, tremendous success has been witnessed in Retrieval-Augmented Generation (RAG), widely used to enhance Large Language Models (LLMs) in domain-specific, knowledge-intensive, and privacy-sensitive tasks. However, attackers may steal those valuable RAGs and deploy or commercialize them, making it essential to detect Intellectual Property (IP) infringement. Most existing ownership protection solutions, such as watermarks, are designed for relational databases and texts. They cannot be directly applied to RAGs because relational database watermarks require white-box access to detect IP infringement, which is unrealistic for the knowledge base in RAGs. Meanwhile, post-processing by the adversary's deployed LLMs typically destructs text watermark information. To address those problems, we propose a novel black-box "knowledge watermark" approach, named RAG-WM, to detect IP infringement of RAGs. RAG-WM uses a multi-LLM interaction framework, comprising a Watermark Generator, Shadow LLM & RAG, and Watermark Discriminator, to create watermark texts based on watermark entity-relationship tuples and inject them into the target RAG. We evaluate RAG-WM across three domain-specific and two privacy-sensitive tasks on four benchmark LLMs. Experimental results show that RAG-WM effectively detects the stolen RAGs in various deployed LLMs. Furthermore, RAG-WM is robust against paraphrasing, unrelated content removal, knowledge insertion, and knowledge expansion attacks. Lastly, RAG-WM can also evade watermark detection approaches, highlighting its promising application in detecting IP infringement of RAG systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05249v1-abstract-full').style.display = 'none'; document.getElementById('2501.05249v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01844">arXiv:2501.01844</a> <span> [<a href="https://arxiv.org/pdf/2501.01844">pdf</a>, <a href="https://arxiv.org/format/2501.01844">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning from Ambiguous Data with Hard Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zeke Xie</a>, <a href="/search/cs?searchtype=author&query=He%2C+Z">Zheng He</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+N">Nan Lu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Lichen Bai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bao Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shuo Yang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingming Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Ping Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01844v2-abstract-short" style="display: inline;"> Real-world data often contains intrinsic ambiguity that the common single-hard-label annotation paradigm ignores. Standard training using ambiguous data with these hard labels may produce overly confident models and thus leading to poor generalization. In this paper, we propose a novel framework called Quantized Label Learning (QLL) to alleviate this issue. First, we formulate QLL as learning from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01844v2-abstract-full').style.display = 'inline'; document.getElementById('2501.01844v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01844v2-abstract-full" style="display: none;"> Real-world data often contains intrinsic ambiguity that the common single-hard-label annotation paradigm ignores. Standard training using ambiguous data with these hard labels may produce overly confident models and thus leading to poor generalization. In this paper, we propose a novel framework called Quantized Label Learning (QLL) to alleviate this issue. First, we formulate QLL as learning from (very) ambiguous data with hard labels: ideally, each ambiguous instance should be associated with a ground-truth soft-label distribution describing its corresponding probabilistic weight in each class, however, this is usually not accessible; in practice, we can only observe a quantized label, i.e., a hard label sampled (quantized) from the corresponding ground-truth soft-label distribution, of each instance, which can be seen as a biased approximation of the ground-truth soft-label. Second, we propose a Class-wise Positive-Unlabeled (CPU) risk estimator that allows us to train accurate classifiers from only ambiguous data with quantized labels. Third, to simulate ambiguous datasets with quantized labels in the real world, we design a mixing-based ambiguous data generation procedure for empirical evaluation. Experiments demonstrate that our CPU method can significantly improve model generalization performance and outperform the baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01844v2-abstract-full').style.display = 'none'; document.getElementById('2501.01844v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 4 figures, accepted by ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00842">arXiv:2501.00842</a> <span> [<a href="https://arxiv.org/pdf/2501.00842">pdf</a>, <a href="https://arxiv.org/format/2501.00842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Secure Semantic Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Meng%2C+R">Rui Meng</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Song Gao</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+D">Dayu Fan</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Haixiao Gao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yining Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaodong Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bizhu Wang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+S">Suyu Lv</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhidi Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengying Sun</a>, <a href="/search/cs?searchtype=author&query=Han%2C+S">Shujun Han</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+C">Chen Dong</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+X">Xiaofeng Tao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00842v1-abstract-short" style="display: inline;"> Semantic communication (SemCom) is regarded as a promising and revolutionary technology in 6G, aiming to transcend the constraints of ``Shannon's trap" by filtering out redundant information and extracting the core of effective data. Compared to traditional communication paradigms, SemCom offers several notable advantages, such as reducing the burden on data transmission, enhancing network managem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00842v1-abstract-full').style.display = 'inline'; document.getElementById('2501.00842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00842v1-abstract-full" style="display: none;"> Semantic communication (SemCom) is regarded as a promising and revolutionary technology in 6G, aiming to transcend the constraints of ``Shannon's trap" by filtering out redundant information and extracting the core of effective data. Compared to traditional communication paradigms, SemCom offers several notable advantages, such as reducing the burden on data transmission, enhancing network management efficiency, and optimizing resource allocation. Numerous researchers have extensively explored SemCom from various perspectives, including network architecture, theoretical analysis, potential technologies, and future applications. However, as SemCom continues to evolve, a multitude of security and privacy concerns have arisen, posing threats to the confidentiality, integrity, and availability of SemCom systems. This paper presents a comprehensive survey of the technologies that can be utilized to secure SemCom. Firstly, we elaborate on the entire life cycle of SemCom, which includes the model training, model transfer, and semantic information transmission phases. Then, we identify the security and privacy issues that emerge during these three stages. Furthermore, we summarize the techniques available to mitigate these security and privacy threats, including data cleaning, robust learning, defensive strategies against backdoor attacks, adversarial training, differential privacy, cryptography, blockchain technology, model compression, and physical-layer security. Lastly, this paper outlines future research directions to guide researchers in related fields. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00842v1-abstract-full').style.display = 'none'; document.getElementById('2501.00842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">123 pages, 27 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Sun%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Sun%2C+M&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>