Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 654 results for author: <span class="mathjax">He, B</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="He, B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=He%2C+B&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="He, B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=He%2C+B&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12594">arXiv:2502.12594</a> <span> [<a href="https://arxiv.org/pdf/2502.12594">pdf</a>, <a href="https://arxiv.org/format/2502.12594">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PASER: Post-Training Data Selection for Efficient Pruned Large Language Model Recovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=He%2C+B">Bowei He</a>, <a href="/search/?searchtype=author&query=Yin%2C+L">Lihao Yin</a>, <a href="/search/?searchtype=author&query=Zhen%2C+H">Hui-Ling Zhen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xiaokun Zhang</a>, <a href="/search/?searchtype=author&query=Yuan%2C+M">Mingxuan Yuan</a>, <a href="/search/?searchtype=author&query=Ma%2C+C">Chen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12594v1-abstract-short" style="display: inline;"> Model pruning is an effective approach for compressing large language models. However, this process often leads to significant degradation of model capabilities. While post-training techniques such as instruction tuning are commonly employed to recover model performance, existing methods often overlook the uneven deterioration of model capabilities and incur high computational costs. Moreover, som… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12594v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12594v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12594v1-abstract-full" style="display: none;"> Model pruning is an effective approach for compressing large language models. However, this process often leads to significant degradation of model capabilities. While post-training techniques such as instruction tuning are commonly employed to recover model performance, existing methods often overlook the uneven deterioration of model capabilities and incur high computational costs. Moreover, some instruction data irrelevant to model capability recovery may introduce negative effects. To address these challenges, we propose the \textbf{P}ost-training d\textbf{A}ta \textbf{S}election method for \textbf{E}fficient pruned large language model \textbf{R}ecovery (\textbf{PASER}). PASER aims to identify instructions where model capabilities are most severely compromised within a certain recovery data budget. Our approach first applies manifold learning and spectral clustering to group recovery data in the semantic space, revealing capability-specific instruction sets. We then adaptively allocate the data budget to different clusters based on the degrees of model capability degradation. In each cluster, we prioritize data samples where model performance has declined dramatically. To mitigate potential negative transfer, we also detect and filter out conflicting or irrelevant recovery data. Extensive experiments demonstrate that PASER significantly outperforms conventional baselines, effectively recovering the general capabilities of pruned LLMs while utilizing merely 4\%-20\% of the original post-training data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12594v1-abstract-full').style.display = 'none'; document.getElementById('2502.12594v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12420">arXiv:2502.12420</a> <span> [<a href="https://arxiv.org/pdf/2502.12420">pdf</a>, <a href="https://arxiv.org/format/2502.12420">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Sens-Merging: Sensitivity-Guided Parameter Balancing for Merging Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+S">Shuqi Liu</a>, <a href="/search/?searchtype=author&query=Wu%2C+H">Han Wu</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bowei He</a>, <a href="/search/?searchtype=author&query=Han%2C+X">Xiongwei Han</a>, <a href="/search/?searchtype=author&query=Yuan%2C+M">Mingxuan Yuan</a>, <a href="/search/?searchtype=author&query=Song%2C+L">Linqi Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12420v2-abstract-short" style="display: inline;"> Recent advances in large language models have led to numerous task-specialized fine-tuned variants, creating a need for efficient model merging techniques that preserve specialized capabilities while avoiding costly retraining. While existing task vector-based merging methods show promise, they typically apply uniform coefficients across all parameters, overlooking varying parameter importance bot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12420v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12420v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12420v2-abstract-full" style="display: none;"> Recent advances in large language models have led to numerous task-specialized fine-tuned variants, creating a need for efficient model merging techniques that preserve specialized capabilities while avoiding costly retraining. While existing task vector-based merging methods show promise, they typically apply uniform coefficients across all parameters, overlooking varying parameter importance both within and across tasks. We present Sens-Merging, a sensitivity-guided coefficient adjustment method that enhances existing model merging techniques by operating at both task-specific and cross-task levels. Our method analyzes parameter sensitivity within individual tasks and evaluates cross-task transferability to determine optimal merging coefficients. Extensive experiments on Mistral 7B and LLaMA2-7B/13B models demonstrate that Sens-Merging significantly improves performance across general knowledge, mathematical reasoning, and code generation tasks. Notably, when combined with existing merging techniques, our method enables merged models to outperform specialized fine-tuned models, particularly in code generation tasks. Our findings reveal important trade-offs between task-specific and cross-task scalings, providing insights for future model merging strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12420v2-abstract-full').style.display = 'none'; document.getElementById('2502.12420v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11573">arXiv:2502.11573</a> <span> [<a href="https://arxiv.org/pdf/2502.11573">pdf</a>, <a href="https://arxiv.org/format/2502.11573">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> InfiR : Crafting Effective Small Language Models and Multimodal Small Language Models in Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xie%2C+C">Congkai Xie</a>, <a href="/search/?searchtype=author&query=Cai%2C+S">Shuo Cai</a>, <a href="/search/?searchtype=author&query=Wang%2C+W">Wenjun Wang</a>, <a href="/search/?searchtype=author&query=Li%2C+P">Pengxiang Li</a>, <a href="/search/?searchtype=author&query=Sang%2C+Z">Zhijie Sang</a>, <a href="/search/?searchtype=author&query=Yang%2C+K">Kejing Yang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yiming Zhang</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Zhen Li</a>, <a href="/search/?searchtype=author&query=Zhu%2C+G">Guanghao Zhu</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Zeyu Liu</a>, <a href="/search/?searchtype=author&query=Yu%2C+Y">Yang Yu</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yuhang Liu</a>, <a href="/search/?searchtype=author&query=Lu%2C+S">Su Lu</a>, <a href="/search/?searchtype=author&query=He%2C+B">Baoyi He</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Q">Qi Zhou</a>, <a href="/search/?searchtype=author&query=Han%2C+X">Xiaotian Han</a>, <a href="/search/?searchtype=author&query=Yuan%2C+J">Jianbo Yuan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+S">Shengyu Zhang</a>, <a href="/search/?searchtype=author&query=Wu%2C+F">Fei Wu</a>, <a href="/search/?searchtype=author&query=Yang%2C+H">Hongxia Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11573v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) and Multimodal Large Language Models (MLLMs) have made significant advancements in reasoning capabilities. However, they still face challenges such as high computational demands and privacy concerns. This paper focuses on developing efficient Small Language Models (SLMs) and Multimodal Small Language Models (MSLMs) that retain competitive reasoning abilities. We introd… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11573v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11573v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11573v1-abstract-full" style="display: none;"> Large Language Models (LLMs) and Multimodal Large Language Models (MLLMs) have made significant advancements in reasoning capabilities. However, they still face challenges such as high computational demands and privacy concerns. This paper focuses on developing efficient Small Language Models (SLMs) and Multimodal Small Language Models (MSLMs) that retain competitive reasoning abilities. We introduce a novel training pipeline that enhances reasoning capabilities and facilitates deployment on edge devices, achieving state-of-the-art performance while minimizing development costs. \InfR~ aims to advance AI systems by improving reasoning, reducing adoption barriers, and addressing privacy concerns through smaller model sizes. Resources are available at https://github. com/Reallm-Labs/InfiR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11573v1-abstract-full').style.display = 'none'; document.getElementById('2502.11573v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10959">arXiv:2502.10959</a> <span> [<a href="https://arxiv.org/pdf/2502.10959">pdf</a>, <a href="https://arxiv.org/format/2502.10959">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Revisiting the Design of In-Memory Dynamic Graph Storage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Su%2C+J">Jixian Su</a>, <a href="/search/?searchtype=author&query=Hao%2C+C">Chiyu Hao</a>, <a href="/search/?searchtype=author&query=Sun%2C+S">Shixuan Sun</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/?searchtype=author&query=Gao%2C+S">Sen Gao</a>, <a href="/search/?searchtype=author&query=Jiang%2C+J">Jiaxin Jiang</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yao Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+C">Chenyi Zhang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/?searchtype=author&query=Guo%2C+M">Minyi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10959v1-abstract-short" style="display: inline;"> The effectiveness of in-memory dynamic graph storage (DGS) for supporting concurrent graph read and write queries is crucial for real-time graph analytics and updates. Various methods have been proposed, for example, LLAMA, Aspen, LiveGraph, Teseo, and Sortledton. These approaches differ significantly in their support for read and write operations, space overhead, and concurrency control. However,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10959v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10959v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10959v1-abstract-full" style="display: none;"> The effectiveness of in-memory dynamic graph storage (DGS) for supporting concurrent graph read and write queries is crucial for real-time graph analytics and updates. Various methods have been proposed, for example, LLAMA, Aspen, LiveGraph, Teseo, and Sortledton. These approaches differ significantly in their support for read and write operations, space overhead, and concurrency control. However, there has been no systematic study to explore the trade-offs among these dimensions. In this paper, we evaluate the effectiveness of individual techniques and identify the performance factors affecting these storage methods by proposing a common abstraction for DGS design and implementing a generic test framework based on this abstraction. Our findings highlight several key insights: 1) Existing DGS methods exhibit substantial space overhead. For example, Aspen consumes 3.3-10.8x more memory than CSR, while the optimal fine-grained methods consume 4.1-8.9x more memory than CSR, indicating a significant memory overhead. 2) Existing methods often overlook memory access impact of modern architectures, leading to performance degradation compared to continuous storage methods. 3) Fine-grained concurrency control methods, in particular, suffer from severe efficiency and space issues due to maintaining versions and performing checks for each neighbor. These methods also experience significant contention on high-degree vertices. Our systematic study reveals these performance bottlenecks and outlines future directions to improve DGS for real-time graph analytics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10959v1-abstract-full').style.display = 'none'; document.getElementById('2502.10959v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10743">arXiv:2502.10743</a> <span> [<a href="https://arxiv.org/pdf/2502.10743">pdf</a>, <a href="https://arxiv.org/format/2502.10743">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> 1bit-Merging: Dynamic Quantized Merging for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+S">Shuqi Liu</a>, <a href="/search/?searchtype=author&query=Wu%2C+H">Han Wu</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bowei He</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Zehua Liu</a>, <a href="/search/?searchtype=author&query=Han%2C+X">Xiongwei Han</a>, <a href="/search/?searchtype=author&query=Yuan%2C+M">Mingxuan Yuan</a>, <a href="/search/?searchtype=author&query=Song%2C+L">Linqi Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10743v1-abstract-short" style="display: inline;"> Recent advances in large language models have led to specialized models excelling in specific domains, creating a need for efficient model merging techniques. While traditional merging approaches combine parameters into a single static model, they often compromise task-specific performance. However, task-specific routing methods maintain accuracy but introduce substantial storage overhead. We pres… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10743v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10743v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10743v1-abstract-full" style="display: none;"> Recent advances in large language models have led to specialized models excelling in specific domains, creating a need for efficient model merging techniques. While traditional merging approaches combine parameters into a single static model, they often compromise task-specific performance. However, task-specific routing methods maintain accuracy but introduce substantial storage overhead. We present \texttt{1bit}-Merging, a novel framework that integrates task-specific routing with 1-bit quantized task vectors to balance performance and storage efficiency. Our approach leverages the observation that different task-specific models store knowledge in distinct layers-chat models primarily in attention layers and math/code models in MLP layers-enabling targeted compression strategies. Through extensive experiments with LLaMA2 and Mistral model families across chat, mathematical reasoning, and code generation tasks, we demonstrate that \texttt{1bit}-Merging achieves comparable or superior performance to existing methods while significantly reducing storage requirements. Our framework offers a practical solution for combining specialized models while maintaining their individual strengths and addressing the storage challenges of current approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10743v1-abstract-full').style.display = 'none'; document.getElementById('2502.10743v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10735">arXiv:2502.10735</a> <span> [<a href="https://arxiv.org/pdf/2502.10735">pdf</a>, <a href="https://arxiv.org/format/2502.10735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> OPTISHEAR: Towards Efficient and Adaptive Pruning of Large Language Models via Evolutionary Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+S">Shuqi Liu</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bowei He</a>, <a href="/search/?searchtype=author&query=Wu%2C+H">Han Wu</a>, <a href="/search/?searchtype=author&query=Song%2C+L">Linqi Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10735v1-abstract-short" style="display: inline;"> Post-training pruning has emerged as a crucial optimization technique as large language models (LLMs) continue to grow rapidly. However, the significant variations in weight distributions across different LLMs make fixed pruning strategies inadequate for multiple models. In this paper, we introduce \textbf{\textsc{OptiShear}}, an efficient evolutionary optimization framework for adaptive LLM pruni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10735v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10735v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10735v1-abstract-full" style="display: none;"> Post-training pruning has emerged as a crucial optimization technique as large language models (LLMs) continue to grow rapidly. However, the significant variations in weight distributions across different LLMs make fixed pruning strategies inadequate for multiple models. In this paper, we introduce \textbf{\textsc{OptiShear}}, an efficient evolutionary optimization framework for adaptive LLM pruning. Our framework features two key innovations: an effective search space built on our Meta pruning metric to handle diverse weight distributions, and a model-wise reconstruction error for rapid evaluation during search trials. We employ Non-dominated Sorting Genetic Algorithm III (NSGA-III) to optimize both pruning metrics and layerwise sparsity ratios. Through extensive evaluation on LLaMA-1/2/3 and Mistral models (7B-70B) across multiple benchmarks, we demonstrate that our adaptive pruning metrics consistently outperform existing methods. Additionally, our discovered layerwise sparsity ratios enhance the effectiveness of other pruning metrics. The framework exhibits strong cross-task and cross-model generalizability, providing a cost-effective solution for model compression. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10735v1-abstract-full').style.display = 'none'; document.getElementById('2502.10735v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10125">arXiv:2502.10125</a> <span> [<a href="https://arxiv.org/pdf/2502.10125">pdf</a>, <a href="https://arxiv.org/format/2502.10125">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning Relational Tabular Data without Shared Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wu%2C+Z">Zhaomin Wu</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">Shida Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Ziyang Wang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10125v1-abstract-short" style="display: inline;"> Learning relational tabular data has gained significant attention recently, but most studies focus on single tables, overlooking the potential of cross-table learning. Cross-table learning, especially in scenarios where tables lack shared features and pre-aligned data, offers vast opportunities but also introduces substantial challenges. The alignment space is immense, and determining accurate ali… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10125v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10125v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10125v1-abstract-full" style="display: none;"> Learning relational tabular data has gained significant attention recently, but most studies focus on single tables, overlooking the potential of cross-table learning. Cross-table learning, especially in scenarios where tables lack shared features and pre-aligned data, offers vast opportunities but also introduces substantial challenges. The alignment space is immense, and determining accurate alignments between tables is highly complex. We propose Latent Entity Alignment Learning (Leal), a novel framework enabling effective cross-table training without requiring shared features or pre-aligned data. Leal operates on the principle that properly aligned data yield lower loss than misaligned data, a concept embodied in its soft alignment mechanism. This mechanism is coupled with a differentiable cluster sampler module, ensuring efficient scaling to large relational tables. Furthermore, we provide a theoretical proof of the cluster sampler's approximation capacity. Extensive experiments on five real-world and five synthetic datasets show that Leal achieves up to a 26.8% improvement in predictive performance compared to state-of-the-art methods, demonstrating its effectiveness and scalability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10125v1-abstract-full').style.display = 'none'; document.getElementById('2502.10125v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08974">arXiv:2502.08974</a> <span> [<a href="https://arxiv.org/pdf/2502.08974">pdf</a>, <a href="https://arxiv.org/format/2502.08974">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Topo2Seq: Enhanced Topology Reasoning via Topology Sequence Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yang%2C+Y">Yiming Yang</a>, <a href="/search/?searchtype=author&query=Luo%2C+Y">Yueru Luo</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingkun He</a>, <a href="/search/?searchtype=author&query=Li%2C+E">Erlong Li</a>, <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhipeng Cao</a>, <a href="/search/?searchtype=author&query=Zheng%2C+C">Chao Zheng</a>, <a href="/search/?searchtype=author&query=Mei%2C+S">Shuqi Mei</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Zhen Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08974v1-abstract-short" style="display: inline;"> Extracting lane topology from perspective views (PV) is crucial for planning and control in autonomous driving. This approach extracts potential drivable trajectories for self-driving vehicles without relying on high-definition (HD) maps. However, the unordered nature and weak long-range perception of the DETR-like framework can result in misaligned segment endpoints and limited topological predic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08974v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08974v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08974v1-abstract-full" style="display: none;"> Extracting lane topology from perspective views (PV) is crucial for planning and control in autonomous driving. This approach extracts potential drivable trajectories for self-driving vehicles without relying on high-definition (HD) maps. However, the unordered nature and weak long-range perception of the DETR-like framework can result in misaligned segment endpoints and limited topological prediction capabilities. Inspired by the learning of contextual relationships in language models, the connectivity relations in roads can be characterized as explicit topology sequences. In this paper, we introduce Topo2Seq, a novel approach for enhancing topology reasoning via topology sequences learning. The core concept of Topo2Seq is a randomized order prompt-to-sequence learning between lane segment decoder and topology sequence decoder. The dual-decoder branches simultaneously learn the lane topology sequences extracted from the Directed Acyclic Graph (DAG) and the lane graph containing geometric information. Randomized order prompt-to-sequence learning extracts unordered key points from the lane graph predicted by the lane segment decoder, which are then fed into the prompt design of the topology sequence decoder to reconstruct an ordered and complete lane graph. In this way, the lane segment decoder learns powerful long-range perception and accurate topological reasoning from the topology sequence decoder. Notably, topology sequence decoder is only introduced during training and does not affect the inference efficiency. Experimental evaluations on the OpenLane-V2 dataset demonstrate the state-of-the-art performance of Topo2Seq in topology reasoning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08974v1-abstract-full').style.display = 'none'; document.getElementById('2502.08974v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08685">arXiv:2502.08685</a> <span> [<a href="https://arxiv.org/pdf/2502.08685">pdf</a>, <a href="https://arxiv.org/format/2502.08685">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Beyond Models! Explainable Data Valuation and Metric Adaption for Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jia%2C+R">Renqi Jia</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xiaokun Zhang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bowei He</a>, <a href="/search/?searchtype=author&query=Zhu%2C+Q">Qiannan Zhu</a>, <a href="/search/?searchtype=author&query=Xu%2C+W">Weitao Xu</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jiehao Chen</a>, <a href="/search/?searchtype=author&query=Ma%2C+C">Chen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08685v1-abstract-short" style="display: inline;"> User behavior records serve as the foundation for recommender systems. While the behavior data exhibits ease of acquisition, it often suffers from varying quality. Current methods employ data valuation to discern high-quality data from low-quality data. However, they tend to employ black-box design, lacking transparency and interpretability. Besides, they are typically tailored to specific evaluat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08685v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08685v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08685v1-abstract-full" style="display: none;"> User behavior records serve as the foundation for recommender systems. While the behavior data exhibits ease of acquisition, it often suffers from varying quality. Current methods employ data valuation to discern high-quality data from low-quality data. However, they tend to employ black-box design, lacking transparency and interpretability. Besides, they are typically tailored to specific evaluation metrics, leading to limited generality across various tasks. To overcome these issues, we propose an explainable and versatile framework DVR which can enhance the efficiency of data utilization tailored to any requirements of the model architectures and evaluation metrics. For explainable data valuation, a data valuator is presented to evaluate the data quality via calculating its Shapley value from the game-theoretic perspective, ensuring robust mathematical properties and reliability. In order to accommodate various evaluation metrics, including differentiable and non-differentiable ones, a metric adapter is devised based on reinforcement learning, where a metric is treated as the reinforcement reward that guides model optimization. Extensive experiments conducted on various benchmarks verify that our framework can improve the performance of current recommendation algorithms on various metrics including ranking accuracy, diversity, and fairness. Specifically, our framework achieves up to 34.7\% improvements over existing methods in terms of representative NDCG metric. The code is available at https://github.com/renqii/DVR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08685v1-abstract-full').style.display = 'none'; document.getElementById('2502.08685v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08160">arXiv:2502.08160</a> <span> [<a href="https://arxiv.org/pdf/2502.08160">pdf</a>, <a href="https://arxiv.org/format/2502.08160">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Vertical Federated Learning in Practice: The Good, the Bad, and the Ugly </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wu%2C+Z">Zhaomin Wu</a>, <a href="/search/?searchtype=author&query=Qin%2C+Z">Zhen Qin</a>, <a href="/search/?searchtype=author&query=Hou%2C+J">Junyi Hou</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Haodong Zhao</a>, <a href="/search/?searchtype=author&query=Li%2C+Q">Qinbin Li</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/?searchtype=author&query=Fan%2C+L">Lixin Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08160v1-abstract-short" style="display: inline;"> Vertical Federated Learning (VFL) is a privacy-preserving collaborative learning paradigm that enables multiple parties with distinct feature sets to jointly train machine learning models without sharing their raw data. Despite its potential to facilitate cross-organizational collaborations, the deployment of VFL systems in real-world applications remains limited. To investigate the gap between ex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08160v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08160v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08160v1-abstract-full" style="display: none;"> Vertical Federated Learning (VFL) is a privacy-preserving collaborative learning paradigm that enables multiple parties with distinct feature sets to jointly train machine learning models without sharing their raw data. Despite its potential to facilitate cross-organizational collaborations, the deployment of VFL systems in real-world applications remains limited. To investigate the gap between existing VFL research and practical deployment, this survey analyzes the real-world data distributions in potential VFL applications and identifies four key findings that highlight this gap. We propose a novel data-oriented taxonomy of VFL algorithms based on real VFL data distributions. Our comprehensive review of existing VFL algorithms reveals that some common practical VFL scenarios have few or no viable solutions. Based on these observations, we outline key research directions aimed at bridging the gap between current VFL research and real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08160v1-abstract-full').style.display = 'none'; document.getElementById('2502.08160v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07288">arXiv:2502.07288</a> <span> [<a href="https://arxiv.org/pdf/2502.07288">pdf</a>, <a href="https://arxiv.org/format/2502.07288">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KPIs 2024 Challenge: Advancing Glomerular Segmentation from Patch- to Slide-Level </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/?searchtype=author&query=Guo%2C+J">Junlin Guo</a>, <a href="/search/?searchtype=author&query=Lu%2C+S">Siqi Lu</a>, <a href="/search/?searchtype=author&query=Xiong%2C+J">Juming Xiong</a>, <a href="/search/?searchtype=author&query=Yu%2C+L">Lining Yu</a>, <a href="/search/?searchtype=author&query=Cap%2C+Q+H">Quan Huu Cap</a>, <a href="/search/?searchtype=author&query=Cai%2C+P">Pengzhou Cai</a>, <a href="/search/?searchtype=author&query=Lan%2C+L">Libin Lan</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Z">Ze Zhao</a>, <a href="/search/?searchtype=author&query=Galdran%2C+A">Adrian Galdran</a>, <a href="/search/?searchtype=author&query=Kumar%2C+A">Amit Kumar</a>, <a href="/search/?searchtype=author&query=Deotale%2C+G">Gunjan Deotale</a>, <a href="/search/?searchtype=author&query=Das%2C+D+K">Dev Kumar Das</a>, <a href="/search/?searchtype=author&query=Paik%2C+I">Inyoung Paik</a>, <a href="/search/?searchtype=author&query=Lee%2C+J">Joonho Lee</a>, <a href="/search/?searchtype=author&query=Lee%2C+G">Geongyu Lee</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yujia Chen</a>, <a href="/search/?searchtype=author&query=Li%2C+W">Wangkai Li</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Zhaoyang Li</a>, <a href="/search/?searchtype=author&query=Hou%2C+X">Xuege Hou</a>, <a href="/search/?searchtype=author&query=Wu%2C+Z">Zeyuan Wu</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">Shengjin Wang</a>, <a href="/search/?searchtype=author&query=Fischer%2C+M">Maximilian Fischer</a> , et al. (22 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07288v1-abstract-short" style="display: inline;"> Chronic kidney disease (CKD) is a major global health issue, affecting over 10% of the population and causing significant mortality. While kidney biopsy remains the gold standard for CKD diagnosis and treatment, the lack of comprehensive benchmarks for kidney pathology segmentation hinders progress in the field. To address this, we organized the Kidney Pathology Image Segmentation (KPIs) Challenge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07288v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07288v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07288v1-abstract-full" style="display: none;"> Chronic kidney disease (CKD) is a major global health issue, affecting over 10% of the population and causing significant mortality. While kidney biopsy remains the gold standard for CKD diagnosis and treatment, the lack of comprehensive benchmarks for kidney pathology segmentation hinders progress in the field. To address this, we organized the Kidney Pathology Image Segmentation (KPIs) Challenge, introducing a dataset that incorporates preclinical rodent models of CKD with over 10,000 annotated glomeruli from 60+ Periodic Acid Schiff (PAS)-stained whole slide images. The challenge includes two tasks, patch-level segmentation and whole slide image segmentation and detection, evaluated using the Dice Similarity Coefficient (DSC) and F1-score. By encouraging innovative segmentation methods that adapt to diverse CKD models and tissue conditions, the KPIs Challenge aims to advance kidney pathology analysis, establish new benchmarks, and enable precise, large-scale quantification for disease research and diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07288v1-abstract-full').style.display = 'none'; document.getElementById('2502.07288v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06892">arXiv:2502.06892</a> <span> [<a href="https://arxiv.org/pdf/2502.06892">pdf</a>, <a href="https://arxiv.org/format/2502.06892">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Certifying Language Model Robustness with Fuzzed Randomized Smoothing: An Efficient Defense Against Backdoor Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=He%2C+B">Bowei He</a>, <a href="/search/?searchtype=author&query=Yin%2C+L">Lihao Yin</a>, <a href="/search/?searchtype=author&query=Zhen%2C+H">Hui-Ling Zhen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jianping Zhang</a>, <a href="/search/?searchtype=author&query=Hong%2C+L">Lanqing Hong</a>, <a href="/search/?searchtype=author&query=Yuan%2C+M">Mingxuan Yuan</a>, <a href="/search/?searchtype=author&query=Ma%2C+C">Chen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06892v1-abstract-short" style="display: inline;"> The widespread deployment of pre-trained language models (PLMs) has exposed them to textual backdoor attacks, particularly those planted during the pre-training stage. These attacks pose significant risks to high-reliability applications, as they can stealthily affect multiple downstream tasks. While certifying robustness against such threats is crucial, existing defenses struggle with the high-di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06892v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06892v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06892v1-abstract-full" style="display: none;"> The widespread deployment of pre-trained language models (PLMs) has exposed them to textual backdoor attacks, particularly those planted during the pre-training stage. These attacks pose significant risks to high-reliability applications, as they can stealthily affect multiple downstream tasks. While certifying robustness against such threats is crucial, existing defenses struggle with the high-dimensional, interdependent nature of textual data and the lack of access to original poisoned pre-training data. To address these challenges, we introduce \textbf{F}uzzed \textbf{R}andomized \textbf{S}moothing (\textbf{FRS}), a novel approach for efficiently certifying language model robustness against backdoor attacks. FRS integrates software robustness certification techniques with biphased model parameter smoothing, employing Monte Carlo tree search for proactive fuzzing to identify vulnerable textual segments within the Damerau-Levenshtein space. This allows for targeted and efficient text randomization, while eliminating the need for access to poisoned training data during model smoothing. Our theoretical analysis demonstrates that FRS achieves a broader certified robustness radius compared to existing methods. Extensive experiments across various datasets, model configurations, and attack strategies validate FRS's superiority in terms of defense efficiency, accuracy, and robustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06892v1-abstract-full').style.display = 'none'; document.getElementById('2502.06892v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05413">arXiv:2502.05413</a> <span> [<a href="https://arxiv.org/pdf/2502.05413">pdf</a>, <a href="https://arxiv.org/format/2502.05413">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Operating Systems">cs.OS</span> </div> </div> <p class="title is-5 mathjax"> XPUTimer: Anomaly Diagnostics for Divergent LLM Training in GPU Clusters of Thousand-Plus Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cui%2C+W">Weihao Cui</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Ji Zhang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Han Zhao</a>, <a href="/search/?searchtype=author&query=Liu%2C+C">Chao Liu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+W">Wenhao Zhang</a>, <a href="/search/?searchtype=author&query=Sha%2C+J">Jian Sha</a>, <a href="/search/?searchtype=author&query=Chen%2C+Q">Quan Chen</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/?searchtype=author&query=Guo%2C+M">Minyi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05413v1-abstract-short" style="display: inline;"> The rapid proliferation of large language models has driven the need for efficient GPU training clusters. However, ensuring high-performance training in these clusters is challenging due to the complexity of software-hardware interactions and the frequent occurrence of training anomalies. Since existing diagnostic tools are narrowly tailored to specific issues, there are gaps in their ability to a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05413v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05413v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05413v1-abstract-full" style="display: none;"> The rapid proliferation of large language models has driven the need for efficient GPU training clusters. However, ensuring high-performance training in these clusters is challenging due to the complexity of software-hardware interactions and the frequent occurrence of training anomalies. Since existing diagnostic tools are narrowly tailored to specific issues, there are gaps in their ability to address anomalies spanning the entire training stack. In response, we introduce XPUTimer, a real-time diagnostic framework designed for distributed LLM training at scale. XPUTimer first integrates a lightweight tracing daemon to monitor key code segments with minimal overhead. Additionally, it features a diagnostic engine that employs novel intra-kernel tracing and holistic aggregated metrics to efficiently identify and resolve anomalies. Deployment of XPUTimer across 6,000 GPUs over eight months demonstrated significant improvements across the training stack, validating its effectiveness in real-world scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05413v1-abstract-full').style.display = 'none'; document.getElementById('2502.05413v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03845">arXiv:2502.03845</a> <span> [<a href="https://arxiv.org/pdf/2502.03845">pdf</a>, <a href="https://arxiv.org/format/2502.03845">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> PAGNet: Pluggable Adaptive Generative Networks for Information Completion in Multi-Agent Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhuohui Zhang</a>, <a href="/search/?searchtype=author&query=Cheng%2C+B">Bin Cheng</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhipeng Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yanmin Zhou</a>, <a href="/search/?searchtype=author&query=Li%2C+G">Gang Li</a>, <a href="/search/?searchtype=author&query=Lu%2C+P">Ping Lu</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bin He</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03845v1-abstract-short" style="display: inline;"> For partially observable cooperative tasks, multi-agent systems must develop effective communication and understand the interplay among agents in order to achieve cooperative goals. However, existing multi-agent reinforcement learning (MARL) with communication methods lack evaluation metrics for information weights and information-level communication modeling. This causes agents to neglect the agg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03845v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03845v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03845v1-abstract-full" style="display: none;"> For partially observable cooperative tasks, multi-agent systems must develop effective communication and understand the interplay among agents in order to achieve cooperative goals. However, existing multi-agent reinforcement learning (MARL) with communication methods lack evaluation metrics for information weights and information-level communication modeling. This causes agents to neglect the aggregation of multiple messages, thereby significantly reducing policy learning efficiency. In this paper, we propose pluggable adaptive generative networks (PAGNet), a novel framework that integrates generative models into MARL to enhance communication and decision-making. PAGNet enables agents to synthesize global states representations from weighted local observations and use these representations alongside learned communication weights for coordinated decision-making. This pluggable approach reduces the computational demands typically associated with the joint training of communication and policy networks. Extensive experimental evaluations across diverse benchmarks and communication scenarios demonstrate the significant performance improvements achieved by PAGNet. Furthermore, we analyze the emergent communication patterns and the quality of generated global states, providing insights into operational mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03845v1-abstract-full').style.display = 'none'; document.getElementById('2502.03845v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03033">arXiv:2502.03033</a> <span> [<a href="https://arxiv.org/pdf/2502.03033">pdf</a>, <a href="https://arxiv.org/format/2502.03033">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Aggregate to Adapt: Node-Centric Aggregation for Multi-Source-Free Graph Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03033v1-abstract-short" style="display: inline;"> Unsupervised graph domain adaptation (UGDA) focuses on transferring knowledge from labeled source graph to unlabeled target graph under domain discrepancies. Most existing UGDA methods are designed to adapt information from a single source domain, which cannot effectively exploit the complementary knowledge from multiple source domains. Furthermore, their assumptions that the labeled source graphs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03033v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03033v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03033v1-abstract-full" style="display: none;"> Unsupervised graph domain adaptation (UGDA) focuses on transferring knowledge from labeled source graph to unlabeled target graph under domain discrepancies. Most existing UGDA methods are designed to adapt information from a single source domain, which cannot effectively exploit the complementary knowledge from multiple source domains. Furthermore, their assumptions that the labeled source graphs are accessible throughout the training procedure might not be practical due to privacy, regulation, and storage concerns. In this paper, we investigate multi-source-free unsupervised graph domain adaptation, i.e., adapting knowledge from multiple source domains to an unlabeled target domain without utilizing labeled source graphs but relying solely on source pre-trained models. Unlike previous multi-source domain adaptation approaches that aggregate predictions at model level, we introduce a novel model named GraphATA which conducts adaptation at node granularity. Specifically, we parameterize each node with its own graph convolutional matrix by automatically aggregating weight matrices from multiple source models according to its local context, thus realizing dynamic adaptation over graph structured data. We also demonstrate the capability of GraphATA to generalize to both model-centric and layer-centric methods. Comprehensive experiments on various public datasets show that our GraphATA can consistently surpass recent state-of-the-art baselines with different gains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03033v1-abstract-full').style.display = 'none'; document.getElementById('2502.03033v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW-2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02458">arXiv:2502.02458</a> <span> [<a href="https://arxiv.org/pdf/2502.02458">pdf</a>, <a href="https://arxiv.org/format/2502.02458">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SAISA: Towards Multimodal Large Language Models with Both Training and Inference Efficiency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yuan%2C+Q">Qianhao Yuan</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yanjiang Liu</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yaojie Lu</a>, <a href="/search/?searchtype=author&query=Lin%2C+H">Hongyu Lin</a>, <a href="/search/?searchtype=author&query=He%2C+B">Ben He</a>, <a href="/search/?searchtype=author&query=Han%2C+X">Xianpei Han</a>, <a href="/search/?searchtype=author&query=Sun%2C+L">Le Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02458v1-abstract-short" style="display: inline;"> Multimodal Large Language Models (MLLMs) mainly fall into two architectures, each involving a trade-off between training and inference efficiency: embedding space alignment (e.g., LLaVA-1.5) is inefficient during inference, while cross-attention space alignment (e.g., Flamingo) is inefficient in training. In this paper, we compare these two architectures and identify the key factors for building e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02458v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02458v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02458v1-abstract-full" style="display: none;"> Multimodal Large Language Models (MLLMs) mainly fall into two architectures, each involving a trade-off between training and inference efficiency: embedding space alignment (e.g., LLaVA-1.5) is inefficient during inference, while cross-attention space alignment (e.g., Flamingo) is inefficient in training. In this paper, we compare these two architectures and identify the key factors for building efficient MLLMs. A primary difference between them lies in how attention is applied to visual tokens, particularly in their interactions with each other. To investigate whether attention among visual tokens is necessary, we propose a new self-attention mechanism, NAAViT (\textbf{N}o \textbf{A}ttention \textbf{A}mong \textbf{Vi}sual \textbf{T}okens), which eliminates this type of attention. Our pilot experiment on LLaVA-1.5 shows that attention among visual tokens is highly redundant. Based on these insights, we introduce SAISA (\textbf{S}elf-\textbf{A}ttention \textbf{I}nput \textbf{S}pace \textbf{A}lignment), a novel architecture that enhance both training and inference efficiency. SAISA directly aligns visual features with the input spaces of NAAViT self-attention blocks, reducing computational overhead in both self-attention blocks and feed-forward networks (FFNs). Using the same configuration as LLaVA-1.5, SAISA reduces inference FLOPs by 66\% and training budget by 26\%, while achieving superior performance in terms of accuracy. Comprehensive ablation studies further validate the effectiveness of SAISA across various LLMs and visual encoders. The code and model will be publicly available at https://github.com/icip-cas/SAISA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02458v1-abstract-full').style.display = 'none'; document.getElementById('2502.02458v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01456">arXiv:2502.01456</a> <span> [<a href="https://arxiv.org/pdf/2502.01456">pdf</a>, <a href="https://arxiv.org/format/2502.01456">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Process Reinforcement through Implicit Rewards </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cui%2C+G">Ganqu Cui</a>, <a href="/search/?searchtype=author&query=Yuan%2C+L">Lifan Yuan</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zefan Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Hanbin Wang</a>, <a href="/search/?searchtype=author&query=Li%2C+W">Wendi Li</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingxiang He</a>, <a href="/search/?searchtype=author&query=Fan%2C+Y">Yuchen Fan</a>, <a href="/search/?searchtype=author&query=Yu%2C+T">Tianyu Yu</a>, <a href="/search/?searchtype=author&query=Xu%2C+Q">Qixin Xu</a>, <a href="/search/?searchtype=author&query=Chen%2C+W">Weize Chen</a>, <a href="/search/?searchtype=author&query=Yuan%2C+J">Jiarui Yuan</a>, <a href="/search/?searchtype=author&query=Chen%2C+H">Huayu Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+K">Kaiyan Zhang</a>, <a href="/search/?searchtype=author&query=Lv%2C+X">Xingtai Lv</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/?searchtype=author&query=Yao%2C+Y">Yuan Yao</a>, <a href="/search/?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/?searchtype=author&query=Peng%2C+H">Hao Peng</a>, <a href="/search/?searchtype=author&query=Cheng%2C+Y">Yu Cheng</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/?searchtype=author&query=Sun%2C+M">Maosong Sun</a>, <a href="/search/?searchtype=author&query=Zhou%2C+B">Bowen Zhou</a>, <a href="/search/?searchtype=author&query=Ding%2C+N">Ning Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01456v1-abstract-short" style="display: inline;"> Dense process rewards have proven a more effective alternative to the sparse outcome-level rewards in the inference-time scaling of large language models (LLMs), particularly in tasks requiring complex multi-step reasoning. While dense rewards also offer an appealing choice for the reinforcement learning (RL) of LLMs since their fine-grained rewards have the potential to address some inherent issu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01456v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01456v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01456v1-abstract-full" style="display: none;"> Dense process rewards have proven a more effective alternative to the sparse outcome-level rewards in the inference-time scaling of large language models (LLMs), particularly in tasks requiring complex multi-step reasoning. While dense rewards also offer an appealing choice for the reinforcement learning (RL) of LLMs since their fine-grained rewards have the potential to address some inherent issues of outcome rewards, such as training efficiency and credit assignment, this potential remains largely unrealized. This can be primarily attributed to the challenges of training process reward models (PRMs) online, where collecting high-quality process labels is prohibitively expensive, making them particularly vulnerable to reward hacking. To address these challenges, we propose PRIME (Process Reinforcement through IMplicit rEwards), which enables online PRM updates using only policy rollouts and outcome labels through implict process rewards. PRIME combines well with various advantage functions and forgoes the dedicated reward model training phrase that existing approaches require, substantially reducing the development overhead. We demonstrate PRIME's effectiveness on competitional math and coding. Starting from Qwen2.5-Math-7B-Base, PRIME achieves a 15.1% average improvement across several key reasoning benchmarks over the SFT model. Notably, our resulting model, Eurus-2-7B-PRIME, surpasses Qwen2.5-Math-7B-Instruct on seven reasoning benchmarks with 10% of its training data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01456v1-abstract-full').style.display = 'none'; document.getElementById('2502.01456v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages. Model&Code&Data available at https://github.com/PRIME-RL/PRIME</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00700">arXiv:2502.00700</a> <span> [<a href="https://arxiv.org/pdf/2502.00700">pdf</a>, <a href="https://arxiv.org/format/2502.00700">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> S2CFormer: Reorienting Learned Image Compression from Spatial Interaction to Channel Aggregation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+Y">Yunuo Chen</a>, <a href="/search/?searchtype=author&query=Li%2C+Q">Qian Li</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bing He</a>, <a href="/search/?searchtype=author&query=Feng%2C+D">Donghui Feng</a>, <a href="/search/?searchtype=author&query=Wu%2C+R">Ronghua Wu</a>, <a href="/search/?searchtype=author&query=Wang%2C+Q">Qi Wang</a>, <a href="/search/?searchtype=author&query=Song%2C+L">Li Song</a>, <a href="/search/?searchtype=author&query=Lu%2C+G">Guo Lu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+W">Wenjun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00700v2-abstract-short" style="display: inline;"> Transformers have achieved significant success in learned image compression (LIC), with Swin Transformers emerging as the mainstream choice for nonlinear transforms. A common belief is that their sophisticated spatial operations contribute most to their efficacy. However, the crucial role of the feed-forward network (FFN) based Channel Aggregation module within the transformer architecture has bee… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00700v2-abstract-full').style.display = 'inline'; document.getElementById('2502.00700v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00700v2-abstract-full" style="display: none;"> Transformers have achieved significant success in learned image compression (LIC), with Swin Transformers emerging as the mainstream choice for nonlinear transforms. A common belief is that their sophisticated spatial operations contribute most to their efficacy. However, the crucial role of the feed-forward network (FFN) based Channel Aggregation module within the transformer architecture has been largely overlooked, and the over-design of spatial operations leads to a suboptimal trade-off between decoding latency and R-D performance. In this paper, we reevaluate the key factors behind the competence of transformers in LIC. By replacing spatial operations with identity mapping, we are surprised to find that channel operations alone can approach the R-D performance of the leading methods. This solid lower bound of performance emphasizes that the presence of channel aggregation is more essential for the LIC model to achieve competitive performance, while the previously complex spatial interactions are partly redundant. Based on this insight, we initiate the "S2CFormer" paradigm, a general architecture that reorients the focus of LIC from Spatial Interaction to Channel Aggregation. We present two instantiations of the S2CFormer: S2C-Conv, and S2C-Attention. Each one incorporates a simple operator for spatial interaction and serves as nonlinear transform blocks for our LIC models. Both models demonstrate state-of-the-art (SOTA) R-D performance and significantly faster decoding speed. These results also motivate further exploration of advanced FFN structures to enhance the R-D performance while maintaining model efficiency. With these foundations, we introduce S2C-Hybrid, an enhanced LIC model that combines the strengths of different S2CFormer instantiations. This model outperforms all the existing methods on several datasets, setting a new benchmark for efficient and high-performance LIC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00700v2-abstract-full').style.display = 'none'; document.getElementById('2502.00700v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00641">arXiv:2502.00641</a> <span> [<a href="https://arxiv.org/pdf/2502.00641">pdf</a>, <a href="https://arxiv.org/format/2502.00641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Small Language Models for News Summarization: Implications and Factors Influencing Performance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xu%2C+B">Borui Xu</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yao Chen</a>, <a href="/search/?searchtype=author&query=Wen%2C+Z">Zeyi Wen</a>, <a href="/search/?searchtype=author&query=Liu%2C+W">Weiguo Liu</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00641v2-abstract-short" style="display: inline;"> The increasing demand for efficient summarization tools in resource-constrained environments highlights the need for effective solutions. While large language models (LLMs) deliver superior summarization quality, their high computational resource requirements limit practical use applications. In contrast, small language models (SLMs) present a more accessible alternative, capable of real-time summ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00641v2-abstract-full').style.display = 'inline'; document.getElementById('2502.00641v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00641v2-abstract-full" style="display: none;"> The increasing demand for efficient summarization tools in resource-constrained environments highlights the need for effective solutions. While large language models (LLMs) deliver superior summarization quality, their high computational resource requirements limit practical use applications. In contrast, small language models (SLMs) present a more accessible alternative, capable of real-time summarization on edge devices. However, their summarization capabilities and comparative performance against LLMs remain underexplored. This paper addresses this gap by presenting a comprehensive evaluation of 19 SLMs for news summarization across 2,000 news samples, focusing on relevance, coherence, factual consistency, and summary length. Our findings reveal significant variations in SLM performance, with top-performing models such as Phi3-Mini and Llama3.2-3B-Ins achieving results comparable to those of 70B LLMs while generating more concise summaries. Notably, SLMs are better suited for simple prompts, as overly complex prompts may lead to a decline in summary quality. Additionally, our analysis indicates that instruction tuning does not consistently enhance the news summarization capabilities of SLMs. This research not only contributes to the understanding of SLMs but also provides practical insights for researchers seeking efficient summarization solutions that balance performance and resource use. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00641v2-abstract-full').style.display = 'none'; document.getElementById('2502.00641v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15391">arXiv:2501.15391</a> <span> [<a href="https://arxiv.org/pdf/2501.15391">pdf</a>, <a href="https://arxiv.org/format/2501.15391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Open Set RF Fingerprinting Identification: A Joint Prediction and Siamese Comparison Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+D">Donghong Cai</a>, <a href="/search/?searchtype=author&query=Shan%2C+J">Jiahao Shan</a>, <a href="/search/?searchtype=author&query=Gao%2C+N">Ning Gao</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingtao He</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yingyang Chen</a>, <a href="/search/?searchtype=author&query=Jin%2C+S">Shi Jin</a>, <a href="/search/?searchtype=author&query=Fan%2C+P">Pingzhi Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15391v1-abstract-short" style="display: inline;"> Radio Frequency Fingerprinting Identification (RFFI) is a lightweight physical layer identity authentication technique. It identifies the radio-frequency device by analyzing the signal feature differences caused by the inevitable minor hardware impairments. However, existing RFFI methods based on closed-set recognition struggle to detect unknown unauthorized devices in open environments. Moreover,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15391v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15391v1-abstract-full" style="display: none;"> Radio Frequency Fingerprinting Identification (RFFI) is a lightweight physical layer identity authentication technique. It identifies the radio-frequency device by analyzing the signal feature differences caused by the inevitable minor hardware impairments. However, existing RFFI methods based on closed-set recognition struggle to detect unknown unauthorized devices in open environments. Moreover, the feature interference among legitimate devices can further compromise identification accuracy. In this paper, we propose a joint radio frequency fingerprint prediction and siamese comparison (JRFFP-SC) framework for open set recognition. Specifically, we first employ a radio frequency fingerprint prediction network to predict the most probable category result. Then a detailed comparison among the test sample's features with registered samples is performed in a siamese network. The proposed JRFFP-SC framework eliminates inter-class interference and effectively addresses the challenges associated with open set identification. The simulation results show that our proposed JRFFP-SC framework can achieve excellent rogue device detection and generalization capability for classifying devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15391v1-abstract-full').style.display = 'none'; document.getElementById('2501.15391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 7 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13893">arXiv:2501.13893</a> <span> [<a href="https://arxiv.org/pdf/2501.13893">pdf</a>, <a href="https://arxiv.org/format/2501.13893">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Pix2Cap-COCO: Advancing Visual Comprehension via Pixel-Level Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=You%2C+Z">Zuyao You</a>, <a href="/search/?searchtype=author&query=Wang%2C+J">Junke Wang</a>, <a href="/search/?searchtype=author&query=Kong%2C+L">Lingyu Kong</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bo He</a>, <a href="/search/?searchtype=author&query=Wu%2C+Z">Zuxuan Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13893v1-abstract-short" style="display: inline;"> We present Pix2Cap-COCO, the first panoptic pixel-level caption dataset designed to advance fine-grained visual understanding. To achieve this, we carefully design an automated annotation pipeline that prompts GPT-4V to generate pixel-aligned, instance-specific captions for individual objects within images, enabling models to learn more granular relationships between objects and their contexts. Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13893v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13893v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13893v1-abstract-full" style="display: none;"> We present Pix2Cap-COCO, the first panoptic pixel-level caption dataset designed to advance fine-grained visual understanding. To achieve this, we carefully design an automated annotation pipeline that prompts GPT-4V to generate pixel-aligned, instance-specific captions for individual objects within images, enabling models to learn more granular relationships between objects and their contexts. This approach results in 167,254 detailed captions, with an average of 22.94 words per caption. Building on Pix2Cap-COCO, we introduce a novel task, panoptic segmentation-captioning, which challenges models to recognize instances in an image and provide detailed descriptions for each simultaneously. To benchmark this task, we design a robust baseline based on X-Decoder. The experimental results demonstrate that Pix2Cap-COCO is a particularly challenging dataset, as it requires models to excel in both fine-grained visual understanding and detailed language generation. Furthermore, we leverage Pix2Cap-COCO for Supervised Fine-Tuning (SFT) on large multimodal models (LMMs) to enhance their performance. For example, training with Pix2Cap-COCO significantly improves the performance of GPT4RoI, yielding gains in CIDEr +1.4%, ROUGE +0.4%, and SPICE +0.5% on Visual Genome dataset, and strengthens its region understanding ability on the ViP-BENCH, with an overall improvement of +5.1%, including notable increases in recognition accuracy +11.2% and language generation quality +22.2%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13893v1-abstract-full').style.display = 'none'; document.getElementById('2501.13893v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08591">arXiv:2501.08591</a> <span> [<a href="https://arxiv.org/pdf/2501.08591">pdf</a>, <a href="https://arxiv.org/format/2501.08591">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OpenMLDB: A Real-Time Relational Data Feature Computation System for Online ML </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+X">Xuanhe Zhou</a>, <a href="/search/?searchtype=author&query=Zhou%2C+W">Wei Zhou</a>, <a href="/search/?searchtype=author&query=Qi%2C+L">Liguo Qi</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dihao Chen</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/?searchtype=author&query=Lu%2C+M">Mian Lu</a>, <a href="/search/?searchtype=author&query=Li%2C+G">Guoliang Li</a>, <a href="/search/?searchtype=author&query=Wu%2C+F">Fan Wu</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yuqiang Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08591v1-abstract-short" style="display: inline;"> Efficient and consistent feature computation is crucial for a wide range of online ML applications. Typically, feature computation is divided into two distinct phases, i.e., offline stage for model training and online stage for model serving. These phases often rely on execution engines with different interface languages and function implementations, causing significant inconsistencies. Moreover,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08591v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08591v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08591v1-abstract-full" style="display: none;"> Efficient and consistent feature computation is crucial for a wide range of online ML applications. Typically, feature computation is divided into two distinct phases, i.e., offline stage for model training and online stage for model serving. These phases often rely on execution engines with different interface languages and function implementations, causing significant inconsistencies. Moreover, many online ML features involve complex time-series computations (e.g., functions over varied-length table windows) that differ from standard streaming and analytical queries. Existing data processing systems (e.g., Spark, Flink, DuckDB) often incur multi-second latencies for these computations, making them unsuitable for real-time online ML applications that demand timely feature updates. This paper presents OpenMLDB, a feature computation system deployed in 4Paradigm's SageOne platform and over 100 real scenarios. Technically, OpenMLDB first employs a unified query plan generator for consistent computation results across the offline and online stages, significantly reducing feature deployment overhead. Second, OpenMLDB provides an online execution engine that resolves performance bottlenecks caused by long window computations (via pre-aggregation) and multi-table window unions (via data self-adjusting). It also provides a high-performance offline execution engine with window parallel optimization and time-aware data skew resolving. Third, OpenMLDB features a compact data format and stream-focused indexing to maximize memory usage and accelerate data access. Evaluations in testing and real workloads reveal significant performance improvements and resource savings compared to the baseline systems. The open community of OpenMLDB now has over 150 contributors and gained 1.6k stars on GitHub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08591v1-abstract-full').style.display = 'none'; document.getElementById('2501.08591v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08579">arXiv:2501.08579</a> <span> [<a href="https://arxiv.org/pdf/2501.08579">pdf</a>, <a href="https://arxiv.org/format/2501.08579">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> What Limits LLM-based Human Simulation: LLMs or Our Design? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+Q">Qian Wang</a>, <a href="/search/?searchtype=author&query=Wu%2C+J">Jiaying Wu</a>, <a href="/search/?searchtype=author&query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/?searchtype=author&query=Luo%2C+B">Bingqiao Luo</a>, <a href="/search/?searchtype=author&query=Chen%2C+N">Nuo Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08579v1-abstract-short" style="display: inline;"> We argue that advancing LLM-based human simulation requires addressing both LLM's inherent limitations and simulation framework design challenges. Recent studies have revealed significant gaps between LLM-based human simulations and real-world observations, highlighting these dual challenges. To address these gaps, we present a comprehensive analysis of LLM limitations and our design issues, propo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08579v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08579v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08579v1-abstract-full" style="display: none;"> We argue that advancing LLM-based human simulation requires addressing both LLM's inherent limitations and simulation framework design challenges. Recent studies have revealed significant gaps between LLM-based human simulations and real-world observations, highlighting these dual challenges. To address these gaps, we present a comprehensive analysis of LLM limitations and our design issues, proposing targeted solutions for both aspects. Furthermore, we explore future directions that address both challenges simultaneously, particularly in data collection, LLM generation, and evaluation. To support further research in this field, we provide a curated collection of LLM-based human simulation resources.\footnote{https://github.com/Persdre/llm-human-simulation} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08579v1-abstract-full').style.display = 'none'; document.getElementById('2501.08579v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04050">arXiv:2501.04050</a> <span> [<a href="https://arxiv.org/pdf/2501.04050">pdf</a>, <a href="https://arxiv.org/ps/2501.04050">ps</a>, <a href="https://arxiv.org/format/2501.04050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Number Theory">math.NT</span> </div> </div> <p class="title is-5 mathjax"> The diophantine equation $\left(2^{k}-1\right)\left(3^{k}-1\right)=x^{n}$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=He%2C+B">Bo He</a>, <a href="/search/?searchtype=author&query=Liu%2C+C">Chang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04050v1-abstract-short" style="display: inline;"> In this paper, we investigate the Diophantine equation \[ (2^k - 1)(3^k - 1) = x^n \] and prove that it has no solutions in positive integers $k, x, n > 2$. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04050v1-abstract-full" style="display: none;"> In this paper, we investigate the Diophantine equation \[ (2^k - 1)(3^k - 1) = x^n \] and prove that it has no solutions in positive integers $k, x, n > 2$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04050v1-abstract-full').style.display = 'none'; document.getElementById('2501.04050v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 11D41 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03936">arXiv:2501.03936</a> <span> [<a href="https://arxiv.org/pdf/2501.03936">pdf</a>, <a href="https://arxiv.org/format/2501.03936">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PPTAgent: Generating and Evaluating Presentations Beyond Text-to-Slides </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zheng%2C+H">Hao Zheng</a>, <a href="/search/?searchtype=author&query=Guan%2C+X">Xinyan Guan</a>, <a href="/search/?searchtype=author&query=Kong%2C+H">Hao Kong</a>, <a href="/search/?searchtype=author&query=Zheng%2C+J">Jia Zheng</a>, <a href="/search/?searchtype=author&query=Zhou%2C+W">Weixiang Zhou</a>, <a href="/search/?searchtype=author&query=Lin%2C+H">Hongyu Lin</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yaojie Lu</a>, <a href="/search/?searchtype=author&query=He%2C+B">Ben He</a>, <a href="/search/?searchtype=author&query=Han%2C+X">Xianpei Han</a>, <a href="/search/?searchtype=author&query=Sun%2C+L">Le Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03936v2-abstract-short" style="display: inline;"> Automatically generating presentations from documents is a challenging task that requires accommodating content quality, visual appeal, and structural coherence. Existing methods primarily focus on improving and evaluating the content quality in isolation, overlooking visual appeal and structural coherence, which limits their practical applicability. To address these limitations, we propose PPTAge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03936v2-abstract-full').style.display = 'inline'; document.getElementById('2501.03936v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03936v2-abstract-full" style="display: none;"> Automatically generating presentations from documents is a challenging task that requires accommodating content quality, visual appeal, and structural coherence. Existing methods primarily focus on improving and evaluating the content quality in isolation, overlooking visual appeal and structural coherence, which limits their practical applicability. To address these limitations, we propose PPTAgent, which comprehensively improves presentation generation through a two-stage, edit-based approach inspired by human workflows. PPTAgent first analyzes reference presentations to extract slide-level functional types and content schemas, then drafts an outline and iteratively generates editing actions based on selected reference slides to create new slides. To comprehensively evaluate the quality of generated presentations, we further introduce PPTEval, an evaluation framework that assesses presentations across three dimensions: Content, Design, and Coherence. Results demonstrate that PPTAgent significantly outperforms existing automatic presentation generation methods across all three dimensions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03936v2-abstract-full').style.display = 'none'; document.getElementById('2501.03936v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 23 figures, see https://github.com/icip-cas/PPTAgent for details</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02795">arXiv:2501.02795</a> <span> [<a href="https://arxiv.org/pdf/2501.02795">pdf</a>, <a href="https://arxiv.org/format/2501.02795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> InfiFusion: A Unified Framework for Enhanced Cross-Model Reasoning via LLM Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yan%2C+Z">Zhaoyi Yan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yiming Zhang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Baoyi He</a>, <a href="/search/?searchtype=author&query=Fu%2C+Y">Yuhao Fu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Q">Qi Zhou</a>, <a href="/search/?searchtype=author&query=Sang%2C+Z">Zhijie Sang</a>, <a href="/search/?searchtype=author&query=Ji%2C+C">Chunlin Ji</a>, <a href="/search/?searchtype=author&query=Zhang%2C+S">Shengyu Zhang</a>, <a href="/search/?searchtype=author&query=Wu%2C+F">Fei Wu</a>, <a href="/search/?searchtype=author&query=Yang%2C+H">Hongxia Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02795v3-abstract-short" style="display: inline;"> We introduce InfiFusion, an efficient training pipeline designed to integrate multiple domain-specialized Large Language Models (LLMs) into a single pivot model, effectively harnessing the strengths of each source model. Traditional fusion methods either merge model parameters directly or rely on knowledge distillation with rigid assumptions, limiting their flexibility and efficiency. InfiFusion o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02795v3-abstract-full').style.display = 'inline'; document.getElementById('2501.02795v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02795v3-abstract-full" style="display: none;"> We introduce InfiFusion, an efficient training pipeline designed to integrate multiple domain-specialized Large Language Models (LLMs) into a single pivot model, effectively harnessing the strengths of each source model. Traditional fusion methods either merge model parameters directly or rely on knowledge distillation with rigid assumptions, limiting their flexibility and efficiency. InfiFusion overcomes these limitations by enhancing Universal Logit Distillation (ULD) with Top-K selection and Logits Standardization. We propose two fusion strategies: Pairwise Fusion (InfiFusion$_p$), where each source model knowledge is distilled individually into the pivot model followed by merging and Unified Fusion (InfiFusion$_u$), where knowledge from all source models is distilled simultaneously into the pivot model. InfiFusion outperforms the state-of-the-art models, such as Qwen-2.5-14B-Instruct and Phi-4, across 11 widely applied benchmarks covering reasoning, coding, mathematics, and instruction-following tasks. Notably, InfiFusion achieves this superior performance while significantly reduces computational costs, completing full training with only 160 H800 GPU hours compared to the millions typically required for traditional LLM training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02795v3-abstract-full').style.display = 'none'; document.getElementById('2501.02795v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Significant performance improvements over the previous version; under review;</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01830">arXiv:2501.01830</a> <span> [<a href="https://arxiv.org/pdf/2501.01830">pdf</a>, <a href="https://arxiv.org/format/2501.01830">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Auto-RT: Automatic Jailbreak Strategy Exploration for Red-Teaming Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+Y">Yanjiang Liu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+S">Shuhen Zhou</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yaojie Lu</a>, <a href="/search/?searchtype=author&query=Zhu%2C+H">Huijia Zhu</a>, <a href="/search/?searchtype=author&query=Wang%2C+W">Weiqiang Wang</a>, <a href="/search/?searchtype=author&query=Lin%2C+H">Hongyu Lin</a>, <a href="/search/?searchtype=author&query=He%2C+B">Ben He</a>, <a href="/search/?searchtype=author&query=Han%2C+X">Xianpei Han</a>, <a href="/search/?searchtype=author&query=Sun%2C+L">Le Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01830v1-abstract-short" style="display: inline;"> Automated red-teaming has become a crucial approach for uncovering vulnerabilities in large language models (LLMs). However, most existing methods focus on isolated safety flaws, limiting their ability to adapt to dynamic defenses and uncover complex vulnerabilities efficiently. To address this challenge, we propose Auto-RT, a reinforcement learning framework that automatically explores and optimi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01830v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01830v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01830v1-abstract-full" style="display: none;"> Automated red-teaming has become a crucial approach for uncovering vulnerabilities in large language models (LLMs). However, most existing methods focus on isolated safety flaws, limiting their ability to adapt to dynamic defenses and uncover complex vulnerabilities efficiently. To address this challenge, we propose Auto-RT, a reinforcement learning framework that automatically explores and optimizes complex attack strategies to effectively uncover security vulnerabilities through malicious queries. Specifically, we introduce two key mechanisms to reduce exploration complexity and improve strategy optimization: 1) Early-terminated Exploration, which accelerate exploration by focusing on high-potential attack strategies; and 2) Progressive Reward Tracking algorithm with intermediate downgrade models, which dynamically refine the search trajectory toward successful vulnerability exploitation. Extensive experiments across diverse LLMs demonstrate that, by significantly improving exploration efficiency and automatically optimizing attack strategies, Auto-RT detects a boarder range of vulnerabilities, achieving a faster detection speed and 16.63\% higher success rates compared to existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01830v1-abstract-full').style.display = 'none'; document.getElementById('2501.01830v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20058">arXiv:2412.20058</a> <span> [<a href="https://arxiv.org/pdf/2412.20058">pdf</a>, <a href="https://arxiv.org/format/2412.20058">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Machine Learning and Physics-based Modelling of Pedestrian Pushing Behaviours at Bottlenecks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xu%2C+Q">Qiancheng Xu</a>, <a href="/search/?searchtype=author&query=%C3%9Csten%2C+E">Ezel 脺sten</a>, <a href="/search/?searchtype=author&query=Alia%2C+A">Ahmed Alia</a>, <a href="/search/?searchtype=author&query=He%2C+B">Biao He</a>, <a href="/search/?searchtype=author&query=Guo%2C+R">Renzhong Guo</a>, <a href="/search/?searchtype=author&query=Chraibi%2C+M">Mohcine Chraibi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20058v1-abstract-short" style="display: inline;"> In high-density crowds, close proximity between pedestrians makes the steady state highly vulnerable to disruption by pushing behaviours, potentially leading to serious accidents. However, the scarcity of experimental data has hindered systematic studies of its mechanisms and accurate modelling. Using behavioural data from bottleneck experiments, we investigate pedestrian heterogeneity in pushing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20058v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20058v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20058v1-abstract-full" style="display: none;"> In high-density crowds, close proximity between pedestrians makes the steady state highly vulnerable to disruption by pushing behaviours, potentially leading to serious accidents. However, the scarcity of experimental data has hindered systematic studies of its mechanisms and accurate modelling. Using behavioural data from bottleneck experiments, we investigate pedestrian heterogeneity in pushing tendencies, showing that pedestrians tend to push under high-motivation and in wider corridors. We introduce a spatial discretization method to encode neighbour states into feature vectors, serving together with pedestrian pushing tendencies as inputs to a random forest model for predicting pushing behaviours. Through comparing speed-headway relationships, we reveal that pushing behaviours correspond to an aggressive space-utilization movement strategy. Consequently, we propose a hybrid machine learning and physics-based model integrating pushing tendencies heterogeneity, pushing behaviours prediction, and dynamic movement strategies adjustment. Validations show that the hybrid model effectively reproduces experimental crowd dynamics and fits to incorporate additional behaviours. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20058v1-abstract-full').style.display = 'none'; document.getElementById('2412.20058v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17265">arXiv:2412.17265</a> <span> [<a href="https://arxiv.org/pdf/2412.17265">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="History and Overview">math.HO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-64302-6_24">10.1007/978-3-031-64302-6_24 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Evaluating the Design Features of an Intelligent Tutoring System for Advanced Mathematics Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Fang%2C+Y">Ying Fang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bo He</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Zhi Liu</a>, <a href="/search/?searchtype=author&query=Liu%2C+S">Sannyuya Liu</a>, <a href="/search/?searchtype=author&query=Yan%2C+Z">Zhonghua Yan</a>, <a href="/search/?searchtype=author&query=Sun%2C+J">Jianwen Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17265v1-abstract-short" style="display: inline;"> Xiaomai is an intelligent tutoring system (ITS) designed to help Chinese college students in learning advanced mathematics and preparing for the graduate school math entrance exam. This study investigates two distinctive features within Xiaomai: the incorporation of free-response questions with automatic feedback and the metacognitive element of reflecting on self-made errors. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17265v1-abstract-full" style="display: none;"> Xiaomai is an intelligent tutoring system (ITS) designed to help Chinese college students in learning advanced mathematics and preparing for the graduate school math entrance exam. This study investigates two distinctive features within Xiaomai: the incorporation of free-response questions with automatic feedback and the metacognitive element of reflecting on self-made errors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17265v1-abstract-full').style.display = 'none'; document.getElementById('2412.17265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16686">arXiv:2412.16686</a> <span> [<a href="https://arxiv.org/pdf/2412.16686">pdf</a>, <a href="https://arxiv.org/format/2412.16686">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> NILE: Internal Consistency Alignment in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hu%2C+M">Minda Hu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Q">Qiyuan Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yufei Wang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bowei He</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Hongru Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J">Jingyan Zhou</a>, <a href="/search/?searchtype=author&query=Li%2C+L">Liangyou Li</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yasheng Wang</a>, <a href="/search/?searchtype=author&query=Ma%2C+C">Chen Ma</a>, <a href="/search/?searchtype=author&query=King%2C+I">Irwin King</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16686v1-abstract-short" style="display: inline;"> As a crucial step to enhance LLMs alignment with human intentions, Instruction Fine-Tuning (IFT) has a high demand on dataset quality. However, existing IFT datasets often contain knowledge that is inconsistent with LLMs' internal knowledge learned from the pre-training phase, which can greatly affect the efficacy of IFT. To address this issue, we introduce NILE (iNternal consIstency aLignmEnt) fr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16686v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16686v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16686v1-abstract-full" style="display: none;"> As a crucial step to enhance LLMs alignment with human intentions, Instruction Fine-Tuning (IFT) has a high demand on dataset quality. However, existing IFT datasets often contain knowledge that is inconsistent with LLMs' internal knowledge learned from the pre-training phase, which can greatly affect the efficacy of IFT. To address this issue, we introduce NILE (iNternal consIstency aLignmEnt) framework, aimed at optimizing IFT datasets to unlock LLMs' capability further. NILE operates by eliciting target pre-trained LLM's internal knowledge corresponding to instruction data. The internal knowledge is leveraged to revise the answer in IFT datasets. Additionally, we propose a novel Internal Consistency Filtering (ICF) method to filter training samples, ensuring its high consistency with LLM's internal knowledge. Our experiments demonstrate that NILE-aligned IFT datasets sharply boost LLM performance across multiple LLM ability evaluation datasets, achieving up to 66.6% gain on Arena-Hard and 68.5% on Alpaca-Eval V2. Further analysis confirms that each component of the NILE}framework contributes to these substantial performance improvements, and provides compelling evidence that dataset consistency with pre-trained internal knowledge is pivotal for maximizing LLM potential. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16686v1-abstract-full').style.display = 'none'; document.getElementById('2412.16686v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13994">arXiv:2412.13994</a> <span> [<a href="https://arxiv.org/pdf/2412.13994">pdf</a>, <a href="https://arxiv.org/format/2412.13994">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Modality-Independent Graph Neural Networks with Global Transformers for Multimodal Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hu%2C+J">Jun Hu</a>, <a href="/search/?searchtype=author&query=Hooi%2C+B">Bryan Hooi</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/?searchtype=author&query=Wei%2C+Y">Yinwei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13994v1-abstract-short" style="display: inline;"> Multimodal recommendation systems can learn users' preferences from existing user-item interactions as well as the semantics of multimodal data associated with items. Many existing methods model this through a multimodal user-item graph, approaching multimodal recommendation as a graph learning task. Graph Neural Networks (GNNs) have shown promising performance in this domain. Prior research has c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13994v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13994v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13994v1-abstract-full" style="display: none;"> Multimodal recommendation systems can learn users' preferences from existing user-item interactions as well as the semantics of multimodal data associated with items. Many existing methods model this through a multimodal user-item graph, approaching multimodal recommendation as a graph learning task. Graph Neural Networks (GNNs) have shown promising performance in this domain. Prior research has capitalized on GNNs' capability to capture neighborhood information within certain receptive fields (typically denoted by the number of hops, $K$) to enrich user and item semantics. We observe that the optimal receptive fields for GNNs can vary across different modalities. In this paper, we propose GNNs with Modality-Independent Receptive Fields, which employ separate GNNs with independent receptive fields for different modalities to enhance performance. Our results indicate that the optimal $K$ for certain modalities on specific datasets can be as low as 1 or 2, which may restrict the GNNs' capacity to capture global information. To address this, we introduce a Sampling-based Global Transformer, which utilizes uniform global sampling to effectively integrate global information for GNNs. We conduct comprehensive experiments that demonstrate the superiority of our approach over existing methods. Our code is publicly available at https://github.com/CrawlScript/MIG-GT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13994v1-abstract-full').style.display = 'none'; document.getElementById('2412.13994v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13549">arXiv:2412.13549</a> <span> [<a href="https://arxiv.org/pdf/2412.13549">pdf</a>, <a href="https://arxiv.org/format/2412.13549">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> EscapeBench: Pushing Language Models to Think Outside the Box </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Qian%2C+C">Cheng Qian</a>, <a href="/search/?searchtype=author&query=Han%2C+P">Peixuan Han</a>, <a href="/search/?searchtype=author&query=Luo%2C+Q">Qinyu Luo</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingxiang He</a>, <a href="/search/?searchtype=author&query=Chen%2C+X">Xiusi Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yuji Zhang</a>, <a href="/search/?searchtype=author&query=Du%2C+H">Hongyi Du</a>, <a href="/search/?searchtype=author&query=Yao%2C+J">Jiarui Yao</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xiaocheng Yang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+D">Denghui Zhang</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yunzhu Li</a>, <a href="/search/?searchtype=author&query=Ji%2C+H">Heng Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13549v1-abstract-short" style="display: inline;"> Language model agents excel in long-session planning and reasoning, but existing benchmarks primarily focus on goal-oriented tasks with explicit objectives, neglecting creative adaptation in unfamiliar environments. To address this, we introduce EscapeBench, a benchmark suite of room escape game environments designed to challenge agents with creative reasoning, unconventional tool use, and iterati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13549v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13549v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13549v1-abstract-full" style="display: none;"> Language model agents excel in long-session planning and reasoning, but existing benchmarks primarily focus on goal-oriented tasks with explicit objectives, neglecting creative adaptation in unfamiliar environments. To address this, we introduce EscapeBench, a benchmark suite of room escape game environments designed to challenge agents with creative reasoning, unconventional tool use, and iterative problem-solving to uncover implicit goals. Our results show that current LM models, despite employing working memory and Chain-of-Thought reasoning, achieve only 15% average progress without hints, highlighting their limitations in creativity. To bridge this gap, we propose EscapeAgent, a framework designed to enhance creative reasoning through Foresight (innovative tool use) and Reflection (identifying unsolved tasks). Experiments show that EscapeAgent can execute action chains over 1,000 steps while maintaining logical coherence. It navigates and completes games with up to 40% fewer steps and hints, performs robustly across varying difficulty levels, and achieves higher action success rates with more efficient and innovative puzzle-solving strategies. All the data and codes are released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13549v1-abstract-full').style.display = 'none'; document.getElementById('2412.13549v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12223">arXiv:2412.12223</a> <span> [<a href="https://arxiv.org/pdf/2412.12223">pdf</a>, <a href="https://arxiv.org/format/2412.12223">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Can video generation replace cinematographers? Research on the cinematic language of generated video </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+X">Xiaozhe Li</a>, <a href="/search/?searchtype=author&query=WU%2C+K">Kai WU</a>, <a href="/search/?searchtype=author&query=Yang%2C+S">Siyi Yang</a>, <a href="/search/?searchtype=author&query=Qu%2C+Y">YiZhan Qu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+G">Guohua. Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhiyu Chen</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jiayao Li</a>, <a href="/search/?searchtype=author&query=Mu%2C+J">Jiangchuan Mu</a>, <a href="/search/?searchtype=author&query=Hu%2C+X">Xiaobin Hu</a>, <a href="/search/?searchtype=author&query=Fang%2C+W">Wen Fang</a>, <a href="/search/?searchtype=author&query=Xiong%2C+M">Mingliang Xiong</a>, <a href="/search/?searchtype=author&query=Deng%2C+H">Hao Deng</a>, <a href="/search/?searchtype=author&query=Liu%2C+Q">Qingwen Liu</a>, <a href="/search/?searchtype=author&query=Li%2C+G">Gang Li</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bin He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12223v1-abstract-short" style="display: inline;"> Recent advancements in text-to-video (T2V) generation have leveraged diffusion models to enhance the visual coherence of videos generated from textual descriptions. However, most research has primarily focused on object motion, with limited attention given to cinematic language in videos, which is crucial for cinematographers to convey emotion and narrative pacing. To address this limitation, we p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12223v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12223v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12223v1-abstract-full" style="display: none;"> Recent advancements in text-to-video (T2V) generation have leveraged diffusion models to enhance the visual coherence of videos generated from textual descriptions. However, most research has primarily focused on object motion, with limited attention given to cinematic language in videos, which is crucial for cinematographers to convey emotion and narrative pacing. To address this limitation, we propose a threefold approach to enhance the ability of T2V models to generate controllable cinematic language. Specifically, we introduce a cinematic language dataset that encompasses shot framing, angle, and camera movement, enabling models to learn diverse cinematic styles. Building on this, to facilitate robust cinematic alignment evaluation, we present CameraCLIP, a model fine-tuned on the proposed dataset that excels in understanding complex cinematic language in generated videos and can further provide valuable guidance in the multi-shot composition process. Finally, we propose CLIPLoRA, a cost-guided dynamic LoRA composition method that facilitates smooth transitions and realistic blending of cinematic language by dynamically fusing multiple pre-trained cinematic LoRAs within a single video. Our experiments demonstrate that CameraCLIP outperforms existing models in assessing the alignment between cinematic language and video, achieving an R@1 score of 0.81. Additionally, CLIPLoRA improves the ability for multi-shot composition, potentially bridging the gap between automatically generated videos and those shot by professional cinematographers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12223v1-abstract-full').style.display = 'none'; document.getElementById('2412.12223v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11483">arXiv:2412.11483</a> <span> [<a href="https://arxiv.org/pdf/2412.11483">pdf</a>, <a href="https://arxiv.org/format/2412.11483">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> "They've Stolen My GPL-Licensed Model!": Toward Standardized and Transparent Model Licensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Duan%2C+M">Moming Duan</a>, <a href="/search/?searchtype=author&query=Zhao%2C+R">Rui Zhao</a>, <a href="/search/?searchtype=author&query=Jiang%2C+L">Linshan Jiang</a>, <a href="/search/?searchtype=author&query=Shadbolt%2C+N">Nigel Shadbolt</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11483v1-abstract-short" style="display: inline;"> As model parameter sizes reach the billion-level range and their training consumes zettaFLOPs of computation, components reuse and collaborative development are become increasingly prevalent in the Machine Learning (ML) community. These components, including models, software, and datasets, may originate from various sources and be published under different licenses, which govern the use and distri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11483v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11483v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11483v1-abstract-full" style="display: none;"> As model parameter sizes reach the billion-level range and their training consumes zettaFLOPs of computation, components reuse and collaborative development are become increasingly prevalent in the Machine Learning (ML) community. These components, including models, software, and datasets, may originate from various sources and be published under different licenses, which govern the use and distribution of licensed works and their derivatives. However, commonly chosen licenses, such as GPL and Apache, are software-specific and are not clearly defined or bounded in the context of model publishing. Meanwhile, the reused components may also have free-content licenses and model licenses, which pose a potential risk of license noncompliance and rights infringement within the model production workflow. In this paper, we propose addressing the above challenges along two lines: 1) For license analysis, we have developed a new vocabulary for ML workflow management and encoded license rules to enable ontological reasoning for analyzing rights granting and compliance issues. 2) For standardized model publishing, we have drafted a set of model licenses that provide flexible options to meet the diverse needs of model publishing. Our analysis tool is built on Turtle language and Notation3 reasoning engine, envisioned as a first step toward Linked Open Model Production Data. We have also encoded our proposed model licenses into rules and demonstrated the effects of GPL and other commonly used licenses in model publishing, along with the flexibility advantages of our licenses, through comparisons and experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11483v1-abstract-full').style.display = 'none'; document.getElementById('2412.11483v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 6 figures. Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09917">arXiv:2412.09917</a> <span> [<a href="https://arxiv.org/pdf/2412.09917">pdf</a>, <a href="https://arxiv.org/format/2412.09917">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Nuclear Theory">nucl-th</span> </div> </div> <p class="title is-5 mathjax"> Improving the predictive power of empirical shell-model Hamiltonians </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Purcell%2C+J+A">J. A. Purcell</a>, <a href="/search/?searchtype=author&query=Brown%2C+B+A">B. A. Brown</a>, <a href="/search/?searchtype=author&query=He%2C+B+C">B. C. He</a>, <a href="/search/?searchtype=author&query=Stroberg%2C+S+R">S. R. Stroberg</a>, <a href="/search/?searchtype=author&query=Walters%2C+W+B">W. B. Walters</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09917v1-abstract-short" style="display: inline;"> We present two developments which enhance the predictive power of empirical shell model Hamiltonians for cases in which calibration data is sparse. A recent improvement in the ab initio derivation of effective Hamiltonians leads to a much better starting point for the optimization procedure. In addition, we introduce a protocol to avoid over-fitting, enabling a more reliable extrapolation beyond a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09917v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09917v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09917v1-abstract-full" style="display: none;"> We present two developments which enhance the predictive power of empirical shell model Hamiltonians for cases in which calibration data is sparse. A recent improvement in the ab initio derivation of effective Hamiltonians leads to a much better starting point for the optimization procedure. In addition, we introduce a protocol to avoid over-fitting, enabling a more reliable extrapolation beyond available data. These developments will enable more robust predictions for exotic isotopes produced at rare isotope beam facilities and in astrophysical environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09917v1-abstract-full').style.display = 'none'; document.getElementById('2412.09917v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05243">arXiv:2412.05243</a> <span> [<a href="https://arxiv.org/pdf/2412.05243">pdf</a>, <a href="https://arxiv.org/format/2412.05243">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CompCap: Improving Multimodal Large Language Models with Composite Captions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+X">Xiaohui Chen</a>, <a href="/search/?searchtype=author&query=Shukla%2C+S+N">Satya Narayan Shukla</a>, <a href="/search/?searchtype=author&query=Azab%2C+M">Mahmoud Azab</a>, <a href="/search/?searchtype=author&query=Singh%2C+A">Aashu Singh</a>, <a href="/search/?searchtype=author&query=Wang%2C+Q">Qifan Wang</a>, <a href="/search/?searchtype=author&query=Yang%2C+D">David Yang</a>, <a href="/search/?searchtype=author&query=Peng%2C+S">ShengYun Peng</a>, <a href="/search/?searchtype=author&query=Yu%2C+H">Hanchao Yu</a>, <a href="/search/?searchtype=author&query=Yan%2C+S">Shen Yan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xuewen Zhang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Baosheng He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05243v1-abstract-short" style="display: inline;"> How well can Multimodal Large Language Models (MLLMs) understand composite images? Composite images (CIs) are synthetic visuals created by merging multiple visual elements, such as charts, posters, or screenshots, rather than being captured directly by a camera. While CIs are prevalent in real-world applications, recent MLLM developments have primarily focused on interpreting natural images (NIs).… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05243v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05243v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05243v1-abstract-full" style="display: none;"> How well can Multimodal Large Language Models (MLLMs) understand composite images? Composite images (CIs) are synthetic visuals created by merging multiple visual elements, such as charts, posters, or screenshots, rather than being captured directly by a camera. While CIs are prevalent in real-world applications, recent MLLM developments have primarily focused on interpreting natural images (NIs). Our research reveals that current MLLMs face significant challenges in accurately understanding CIs, often struggling to extract information or perform complex reasoning based on these images. We find that existing training data for CIs are mostly formatted for question-answer tasks (e.g., in datasets like ChartQA and ScienceQA), while high-quality image-caption datasets, critical for robust vision-language alignment, are only available for NIs. To bridge this gap, we introduce Composite Captions (CompCap), a flexible framework that leverages Large Language Models (LLMs) and automation tools to synthesize CIs with accurate and detailed captions. Using CompCap, we curate CompCap-118K, a dataset containing 118K image-caption pairs across six CI types. We validate the effectiveness of CompCap-118K by supervised fine-tuning MLLMs of three sizes: xGen-MM-inst.-4B and LLaVA-NeXT-Vicuna-7B/13B. Empirical results show that CompCap-118K significantly enhances MLLMs' understanding of CIs, yielding average gains of 1.7%, 2.0%, and 2.9% across eleven benchmarks, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05243v1-abstract-full').style.display = 'none'; document.getElementById('2412.05243v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00020">arXiv:2412.00020</a> <span> [<a href="https://arxiv.org/pdf/2412.00020">pdf</a>, <a href="https://arxiv.org/format/2412.00020">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Partitioning Message Passing for Graph Fraud Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhuo%2C+W">Wei Zhuo</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Zemin Liu</a>, <a href="/search/?searchtype=author&query=Hooi%2C+B">Bryan Hooi</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/?searchtype=author&query=Tan%2C+G">Guang Tan</a>, <a href="/search/?searchtype=author&query=Fathony%2C+R">Rizal Fathony</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jia Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00020v1-abstract-short" style="display: inline;"> Label imbalance and homophily-heterophily mixture are the fundamental problems encountered when applying Graph Neural Networks (GNNs) to Graph Fraud Detection (GFD) tasks. Existing GNN-based GFD models are designed to augment graph structure to accommodate the inductive bias of GNNs towards homophily, by excluding heterophilic neighbors during message passing. In our work, we argue that the key to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00020v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00020v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00020v1-abstract-full" style="display: none;"> Label imbalance and homophily-heterophily mixture are the fundamental problems encountered when applying Graph Neural Networks (GNNs) to Graph Fraud Detection (GFD) tasks. Existing GNN-based GFD models are designed to augment graph structure to accommodate the inductive bias of GNNs towards homophily, by excluding heterophilic neighbors during message passing. In our work, we argue that the key to applying GNNs for GFD is not to exclude but to {\em distinguish} neighbors with different labels. Grounded in this perspective, we introduce Partitioning Message Passing (PMP), an intuitive yet effective message passing paradigm expressly crafted for GFD. Specifically, in the neighbor aggregation stage of PMP, neighbors with different classes are aggregated with distinct node-specific aggregation functions. By this means, the center node can adaptively adjust the information aggregated from its heterophilic and homophilic neighbors, thus avoiding the model gradient being dominated by benign nodes which occupy the majority of the population. We theoretically establish a connection between the spatial formulation of PMP and spectral analysis to characterize that PMP operates an adaptive node-specific spectral graph filter, which demonstrates the capability of PMP to handle heterophily-homophily mixed graphs. Extensive experimental results show that PMP can significantly boost the performance on GFD tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00020v1-abstract-full').style.display = 'none'; document.getElementById('2412.00020v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19703">arXiv:2411.19703</a> <span> [<a href="https://arxiv.org/pdf/2411.19703">pdf</a>, <a href="https://arxiv.org/format/2411.19703">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Highly coherent two-color laser with stability below 3E-17 at 1 second </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=He%2C+B">Bibo He</a>, <a href="/search/?searchtype=author&query=Yang%2C+J">Jiachuan Yang</a>, <a href="/search/?searchtype=author&query=Meng%2C+F">Fei Meng</a>, <a href="/search/?searchtype=author&query=Yu%2C+J">Jialiang Yu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+C">Chenbo Zhang</a>, <a href="/search/?searchtype=author&query=Yang%2C+Q">Qi-Fan Yang</a>, <a href="/search/?searchtype=author&query=Zuo%2C+Y">Yani Zuo</a>, <a href="/search/?searchtype=author&query=Lin%2C+Y">Yige Lin</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhangyuan Chen</a>, <a href="/search/?searchtype=author&query=Fang%2C+Z">Zhanjun Fang</a>, <a href="/search/?searchtype=author&query=Xie%2C+X">Xiaopeng Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19703v1-abstract-short" style="display: inline;"> Two-color lasers with high coherence are paramount in precision measurement, accurate light-matter interaction, and low-noise photonic microwave generation. However, conventional two-color lasers often suffer from low coherence, particularly when these two colors face large frequency spacings. Here, harnessing the Pound-Drever-Hall technique, we synchronize two lasers to a shared ultra-stable opti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19703v1-abstract-full').style.display = 'inline'; document.getElementById('2411.19703v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19703v1-abstract-full" style="display: none;"> Two-color lasers with high coherence are paramount in precision measurement, accurate light-matter interaction, and low-noise photonic microwave generation. However, conventional two-color lasers often suffer from low coherence, particularly when these two colors face large frequency spacings. Here, harnessing the Pound-Drever-Hall technique, we synchronize two lasers to a shared ultra-stable optical reference cavity to break through the thermal noise constraint, achieving a highly coherent two-color laser. With conquering these non-common mode noises, we demonstrate an exceptional fractional frequency instability of 2.7E-17 at 1 second when normalized to the optical frequency. Characterizing coherence across large frequency spacings poses a significant challenge. To tackle this, we employ electro-optical frequency division to transfer the relative stability of a 0.5 THz spacing two-color laser to a 25 GHz microwave signal. As its performance surpasses the sensitivity of the current apparatus, we establish two independent systems for comparative analyses. The resulting 25 GHz signals exhibit exceptional phase noise of -74 dBc/Hz at 1 Hz and -120 dBc/Hz at 100 Hz, demonstrating the two-color laser's performance approaching the quantum noise limit of its synchronization system. It also sets a new record for the two-point frequency division method in photonic microwave generation. Our achievement in highly coherent two-color lasers and low-noise microwave signals will usher in a new era for precision measurements and refine the accuracy of light-matter and microwave-matter interactions to their next decimal place. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19703v1-abstract-full').style.display = 'none'; document.getElementById('2411.19703v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15492">arXiv:2411.15492</a> <span> [<a href="https://arxiv.org/pdf/2411.15492">pdf</a>, <a href="https://arxiv.org/format/2411.15492">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Nuclear Theory">nucl-th</span> </div> </div> <p class="title is-5 mathjax"> Reduced Basis Method for Few-body Bound State Emulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cheng%2C+R+Y">R. Y. Cheng</a>, <a href="/search/?searchtype=author&query=Godbey%2C+K">K. Godbey</a>, <a href="/search/?searchtype=author&query=Niu%2C+Y+B">Y. B. Niu</a>, <a href="/search/?searchtype=author&query=Ma%2C+Y+G">Y. G. Ma</a>, <a href="/search/?searchtype=author&query=He%2C+W+B">W. B. He</a>, <a href="/search/?searchtype=author&query=Wang%2C+S+M">S. M. Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15492v1-abstract-short" style="display: inline;"> Recent advances in both theoretical and computational methods have enabled large-scale, precision calculations of the properties of atomic nuclei. With the growing complexity of modern nuclear theory, however, also comes the need for novel methods to perform systematic studies and quantify the uncertainties of models when confronted with experimental data. This study presents an application of suc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15492v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15492v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15492v1-abstract-full" style="display: none;"> Recent advances in both theoretical and computational methods have enabled large-scale, precision calculations of the properties of atomic nuclei. With the growing complexity of modern nuclear theory, however, also comes the need for novel methods to perform systematic studies and quantify the uncertainties of models when confronted with experimental data. This study presents an application of such an approach, the reduced basis method, to substantially lower computational costs by constructing a significantly smaller Hamiltonian subspace informed by previous solutions. Our method shows comparable efficiency and accuracy to other dimensionality reduction techniques on an artificial three-body bound system while providing a richer representation of physical information in its projection and training subspace. This methodological advancement can be applied in other contexts and has the potential to greatly improve our ability to systematically explore theoretical models and thus enhance our understanding of the fundamental properties of nuclear systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15492v1-abstract-full').style.display = 'none'; document.getElementById('2411.15492v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13440">arXiv:2411.13440</a> <span> [<a href="https://arxiv.org/pdf/2411.13440">pdf</a>, <a href="https://arxiv.org/format/2411.13440">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Eco-Friendly 0G Networks: Unlocking the Power of Backscatter Communications for a Greener Future </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Javaid%2C+S">Shumaila Javaid</a>, <a href="/search/?searchtype=author&query=Fahim%2C+H">Hamza Fahim</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bin He</a>, <a href="/search/?searchtype=author&query=Saeed%2C+N">Nasir Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13440v1-abstract-short" style="display: inline;"> Backscatter Communication (BackCom) technology has emerged as a promising paradigm for the Green Internet of Things (IoT) ecosystem, offering advantages such as low power consumption, cost-effectiveness, and ease of deployment. While traditional BackCom systems, such as RFID technology, have found widespread applications, the advent of ambient backscatter presents new opportunities for expanding a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13440v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13440v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13440v1-abstract-full" style="display: none;"> Backscatter Communication (BackCom) technology has emerged as a promising paradigm for the Green Internet of Things (IoT) ecosystem, offering advantages such as low power consumption, cost-effectiveness, and ease of deployment. While traditional BackCom systems, such as RFID technology, have found widespread applications, the advent of ambient backscatter presents new opportunities for expanding applications and enhancing capabilities. Moreover, ongoing standardization efforts are actively focusing on BackCom technologies, positioning them as a potential solution to meet the near-zero power consumption and massive connectivity requirements of next-generation wireless systems. 0G networks have the potential to provide advanced solutions by leveraging BackCom technology to deliver ultra-low-power, ubiquitous connectivity for the expanding IoT ecosystem, supporting billions of devices with minimal energy consumption. This paper investigates the integration of BackCom and 0G networks to enhance the capabilities of traditional BackCom systems and enable Green IoT. We conduct an in-depth analysis of BackCom-enabled 0G networks, exploring their architecture and operational objectives, and also explore the Waste Factor (WF) metric for evaluating energy efficiency and minimizing energy waste within integrated systems. By examining both structural and operational aspects, we demonstrate how this synergy enhances the performance, scalability, and sustainability of next-generation wireless networks. Moreover, we highlight possible applications, open challenges, and future directions, offering valuable insights for guiding future research and practical implementations aimed at achieving large-scale, sustainable IoT deployments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13440v1-abstract-full').style.display = 'none'; document.getElementById('2411.13440v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11504">arXiv:2411.11504</a> <span> [<a href="https://arxiv.org/pdf/2411.11504">pdf</a>, <a href="https://arxiv.org/format/2411.11504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Search, Verify and Feedback: Towards Next Generation Post-training Paradigm of Foundation Models via Verifier Engineering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Guan%2C+X">Xinyan Guan</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yanjiang Liu</a>, <a href="/search/?searchtype=author&query=Lu%2C+X">Xinyu Lu</a>, <a href="/search/?searchtype=author&query=Cao%2C+B">Boxi Cao</a>, <a href="/search/?searchtype=author&query=He%2C+B">Ben He</a>, <a href="/search/?searchtype=author&query=Han%2C+X">Xianpei Han</a>, <a href="/search/?searchtype=author&query=Sun%2C+L">Le Sun</a>, <a href="/search/?searchtype=author&query=Lou%2C+J">Jie Lou</a>, <a href="/search/?searchtype=author&query=Yu%2C+B">Bowen Yu</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yaojie Lu</a>, <a href="/search/?searchtype=author&query=Lin%2C+H">Hongyu Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11504v1-abstract-short" style="display: inline;"> The evolution of machine learning has increasingly prioritized the development of powerful models and more scalable supervision signals. However, the emergence of foundation models presents significant challenges in providing effective supervision signals necessary for further enhancing their capabilities. Consequently, there is an urgent need to explore novel supervision signals and technical app… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11504v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11504v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11504v1-abstract-full" style="display: none;"> The evolution of machine learning has increasingly prioritized the development of powerful models and more scalable supervision signals. However, the emergence of foundation models presents significant challenges in providing effective supervision signals necessary for further enhancing their capabilities. Consequently, there is an urgent need to explore novel supervision signals and technical approaches. In this paper, we propose verifier engineering, a novel post-training paradigm specifically designed for the era of foundation models. The core of verifier engineering involves leveraging a suite of automated verifiers to perform verification tasks and deliver meaningful feedback to foundation models. We systematically categorize the verifier engineering process into three essential stages: search, verify, and feedback, and provide a comprehensive review of state-of-the-art research developments within each stage. We believe that verifier engineering constitutes a fundamental pathway toward achieving Artificial General Intelligence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11504v1-abstract-full').style.display = 'none'; document.getElementById('2411.11504v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08212">arXiv:2411.08212</a> <span> [<a href="https://arxiv.org/pdf/2411.08212">pdf</a>, <a href="https://arxiv.org/format/2411.08212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PERFT: Parameter-Efficient Routed Fine-Tuning for Mixture-of-Expert Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+Y">Yilun Liu</a>, <a href="/search/?searchtype=author&query=Ma%2C+Y">Yunpu Ma</a>, <a href="/search/?searchtype=author&query=Chen%2C+S">Shuo Chen</a>, <a href="/search/?searchtype=author&query=Ding%2C+Z">Zifeng Ding</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bailan He</a>, <a href="/search/?searchtype=author&query=Han%2C+Z">Zhen Han</a>, <a href="/search/?searchtype=author&query=Tresp%2C+V">Volker Tresp</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08212v1-abstract-short" style="display: inline;"> The Mixture-of-Experts (MoE) paradigm has emerged as a powerful approach for scaling transformers with improved resource utilization. However, efficiently fine-tuning MoE models remains largely underexplored. Inspired by recent works on Parameter-Efficient Fine-Tuning (PEFT), we present a unified framework for integrating PEFT modules directly into the MoE mechanism. Aligning with the core princip… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08212v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08212v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08212v1-abstract-full" style="display: none;"> The Mixture-of-Experts (MoE) paradigm has emerged as a powerful approach for scaling transformers with improved resource utilization. However, efficiently fine-tuning MoE models remains largely underexplored. Inspired by recent works on Parameter-Efficient Fine-Tuning (PEFT), we present a unified framework for integrating PEFT modules directly into the MoE mechanism. Aligning with the core principles and architecture of MoE, our framework encompasses a set of design dimensions including various functional and composition strategies. By combining design choices within our framework, we introduce Parameter-Efficient Routed Fine-Tuning (PERFT) as a flexible and scalable family of PEFT strategies tailored for MoE models. Extensive experiments on adapting OLMoE-1B-7B and Mixtral-8$\times$7B for commonsense and arithmetic reasoning tasks demonstrate the effectiveness, scalability, and intriguing dynamics of PERFT. Additionally, we provide empirical findings for each specific design choice to facilitate better application of MoE and PEFT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08212v1-abstract-full').style.display = 'none'; document.getElementById('2411.08212v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code available via https://anonymous.4open.science/r/PERFT-MoE/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03250">arXiv:2411.03250</a> <span> [<a href="https://arxiv.org/pdf/2411.03250">pdf</a>, <a href="https://arxiv.org/format/2411.03250">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DiffLM: Controllable Synthetic Data Generation via Diffusion Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+Y">Ying Zhou</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xinyao Wang</a>, <a href="/search/?searchtype=author&query=Niu%2C+Y">Yulei Niu</a>, <a href="/search/?searchtype=author&query=Shen%2C+Y">Yaojie Shen</a>, <a href="/search/?searchtype=author&query=Tang%2C+L">Lexin Tang</a>, <a href="/search/?searchtype=author&query=Chen%2C+F">Fan Chen</a>, <a href="/search/?searchtype=author&query=He%2C+B">Ben He</a>, <a href="/search/?searchtype=author&query=Sun%2C+L">Le Sun</a>, <a href="/search/?searchtype=author&query=Wen%2C+L">Longyin Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03250v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) have significantly enhanced their knowledge and generative capabilities, leading to a surge of interest in leveraging LLMs for high-quality data synthesis. However, synthetic data generation via prompting LLMs remains challenging due to LLMs' limited understanding of target data distributions and the complexity of prompt engineering, especially f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03250v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03250v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03250v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) have significantly enhanced their knowledge and generative capabilities, leading to a surge of interest in leveraging LLMs for high-quality data synthesis. However, synthetic data generation via prompting LLMs remains challenging due to LLMs' limited understanding of target data distributions and the complexity of prompt engineering, especially for structured formatted data. To address these issues, we introduce DiffLM, a controllable data synthesis framework based on variational autoencoder (VAE), which further (1) leverages diffusion models to reserve more information of original distribution and format structure in the learned latent distribution and (2) decouples the learning of target distribution knowledge from the LLM's generative objectives via a plug-and-play latent feature injection module. As we observed significant discrepancies between the VAE's latent representations and the real data distribution, the latent diffusion module is introduced into our framework to learn a fully expressive latent distribution. Evaluations on seven real-world datasets with structured formatted data (i.e., Tabular, Code and Tool data) demonstrate that DiffLM generates high-quality data, with performance on downstream tasks surpassing that of real data by 2-7 percent in certain cases. The data and code will be publicly available upon completion of internal review. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03250v1-abstract-full').style.display = 'none'; document.getElementById('2411.03250v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02608">arXiv:2411.02608</a> <span> [<a href="https://arxiv.org/pdf/2411.02608">pdf</a>, <a href="https://arxiv.org/format/2411.02608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> SSFold: Learning to Fold Arbitrary Crumpled Cloth Using Graph Dynamics from Human Demonstration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+C">Changshi Zhou</a>, <a href="/search/?searchtype=author&query=Xu%2C+H">Haichuan Xu</a>, <a href="/search/?searchtype=author&query=Hu%2C+J">Jiarui Hu</a>, <a href="/search/?searchtype=author&query=Luan%2C+F">Feng Luan</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhipeng Wang</a>, <a href="/search/?searchtype=author&query=Dong%2C+Y">Yanchao Dong</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yanmin Zhou</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bin He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02608v1-abstract-short" style="display: inline;"> Robotic cloth manipulation faces challenges due to the fabric's complex dynamics and the high dimensionality of configuration spaces. Previous methods have largely focused on isolated smoothing or folding tasks and overly reliant on simulations, often failing to bridge the significant sim-to-real gap in deformable object manipulation. To overcome these challenges, we propose a two-stream architect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02608v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02608v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02608v1-abstract-full" style="display: none;"> Robotic cloth manipulation faces challenges due to the fabric's complex dynamics and the high dimensionality of configuration spaces. Previous methods have largely focused on isolated smoothing or folding tasks and overly reliant on simulations, often failing to bridge the significant sim-to-real gap in deformable object manipulation. To overcome these challenges, we propose a two-stream architecture with sequential and spatial pathways, unifying smoothing and folding tasks into a single adaptable policy model that accommodates various cloth types and states. The sequential stream determines the pick and place positions for the cloth, while the spatial stream, using a connectivity dynamics model, constructs a visibility graph from partial point cloud data of the self-occluded cloth, allowing the robot to infer the cloth's full configuration from incomplete observations. To bridge the sim-to-real gap, we utilize a hand tracking detection algorithm to gather and integrate human demonstration data into our novel end-to-end neural network, improving real-world adaptability. Our method, validated on a UR5 robot across four distinct cloth folding tasks with different goal shapes, consistently achieves folded states from arbitrary crumpled initial configurations, with success rates of 99\%, 99\%, 83\%, and 67\%. It outperforms existing state-of-the-art cloth manipulation techniques and demonstrates strong generalization to unseen cloth with diverse colors, shapes, and stiffness in real-world experiments.Videos and source code are available at: https://zcswdt.github.io/SSFold/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02608v1-abstract-full').style.display = 'none'; document.getElementById('2411.02608v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01215">arXiv:2411.01215</a> <span> [<a href="https://arxiv.org/pdf/2411.01215">pdf</a>, <a href="https://arxiv.org/format/2411.01215">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Astrophysical Phenomena">astro-ph.HE</span> </div> </div> <p class="title is-5 mathjax"> Detection of two TeV gamma-ray outbursts from NGC 1275 by LHAASO </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhen Cao</a>, <a href="/search/?searchtype=author&query=Aharonian%2C+F">F. Aharonian</a>, <a href="/search/?searchtype=author&query=Axikegu"> Axikegu</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y+X">Y. X. Bai</a>, <a href="/search/?searchtype=author&query=Bao%2C+Y+W">Y. W. Bao</a>, <a href="/search/?searchtype=author&query=Bastieri%2C+D">D. Bastieri</a>, <a href="/search/?searchtype=author&query=Bi%2C+X+J">X. J. Bi</a>, <a href="/search/?searchtype=author&query=Bi%2C+Y+J">Y. J. Bi</a>, <a href="/search/?searchtype=author&query=Cai%2C+J+T">J. T. Cai</a>, <a href="/search/?searchtype=author&query=Cao%2C+Q">Q. Cao</a>, <a href="/search/?searchtype=author&query=Cao%2C+W+Y">W. Y. Cao</a>, <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhe Cao</a>, <a href="/search/?searchtype=author&query=Chang%2C+J">J. Chang</a>, <a href="/search/?searchtype=author&query=Chang%2C+J+F">J. F. Chang</a>, <a href="/search/?searchtype=author&query=Chen%2C+A+M">A. M. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+E+S">E. S. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Liang Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Lin Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Long Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+M+J">M. J. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+M+L">M. L. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+Q+H">Q. H. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+S+H">S. H. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+S+Z">S. Z. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+T+L">T. L. Chen</a> , et al. (254 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01215v2-abstract-short" style="display: inline;"> The Water Cherenkov Detector Array (WCDA) is one of the components of Large High Altitude Air Shower Observatory (LHAASO) and can monitor any sources over two-thirds of the sky for up to 7 hours per day with >98\% duty cycle. In this work, we report the detection of two outbursts of the Fanaroff-Riley I radio galaxy NGC 1275 that were detected by LHAASO-WCDA between November 2022 and January 2023… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01215v2-abstract-full').style.display = 'inline'; document.getElementById('2411.01215v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01215v2-abstract-full" style="display: none;"> The Water Cherenkov Detector Array (WCDA) is one of the components of Large High Altitude Air Shower Observatory (LHAASO) and can monitor any sources over two-thirds of the sky for up to 7 hours per day with >98\% duty cycle. In this work, we report the detection of two outbursts of the Fanaroff-Riley I radio galaxy NGC 1275 that were detected by LHAASO-WCDA between November 2022 and January 2023 with statistical significance of 5.2~$蟽$ and 8.3~$蟽$. The observed spectral energy distribution in the range from 500 GeV to 3 TeV is fitted by a power-law with a best-fit spectral index of $伪=-3.37\pm0.52$ and $-3.35\pm0.29$, respectively. The outburst flux above 0.5~TeV was ($4.55\pm 4.21)\times~10^{-11}~\rm cm^{-2}~s^{-1}$ and ($3.45\pm 1.78)\times~10^{-11}~\rm cm^{-2}~s^{-1}$, corresponding to 60\%, 45\% of Crab Nebula flux. Variation analysis reveals the variability time-scale of days at the TeV energy band. A simple test by one-zone synchrotron self-Compton model reproduces the data in the gamma-ray band well. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01215v2-abstract-full').style.display = 'none'; document.getElementById('2411.01215v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 8 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21809">arXiv:2410.21809</a> <span> [<a href="https://arxiv.org/pdf/2410.21809">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> First-in-human spinal cord tumor imaging with fast adaptive focus tracking robotic-OCT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=He%2C+B">Bin He</a>, <a href="/search/?searchtype=author&query=Ying%2C+Y">Yuzhe Ying</a>, <a href="/search/?searchtype=author&query=Shi%2C+Y">Yejiong Shi</a>, <a href="/search/?searchtype=author&query=Meng%2C+Z">Zhe Meng</a>, <a href="/search/?searchtype=author&query=Yin%2C+Z">Zichen Yin</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhengyu Chen</a>, <a href="/search/?searchtype=author&query=Hu%2C+Z">Zhangwei Hu</a>, <a href="/search/?searchtype=author&query=Xue%2C+R">Ruizhi Xue</a>, <a href="/search/?searchtype=author&query=Jing%2C+L">Linkai Jing</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yang Lu</a>, <a href="/search/?searchtype=author&query=Sun%2C+Z">Zhenxing Sun</a>, <a href="/search/?searchtype=author&query=Man%2C+W">Weitao Man</a>, <a href="/search/?searchtype=author&query=Wu%2C+Y">Youtu Wu</a>, <a href="/search/?searchtype=author&query=Lei%2C+D">Dan Lei</a>, <a href="/search/?searchtype=author&query=Zhang%2C+N">Ning Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+G">Guihuai Wang</a>, <a href="/search/?searchtype=author&query=Xue%2C+P">Ping Xue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21809v2-abstract-short" style="display: inline;"> Current surgical procedures for spinal cord tumors lack in vivo high-resolution, high-speed multifunctional imaging systems, posing challenges for precise tumor resection and intraoperative decision-making. This study introduces the Fast Adaptive Focus Tracking Robotic Optical Coherence Tomography (FACT-ROCT) system,designed to overcome these obstacles by providing real-time, artifact-free multifu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21809v2-abstract-full').style.display = 'inline'; document.getElementById('2410.21809v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21809v2-abstract-full" style="display: none;"> Current surgical procedures for spinal cord tumors lack in vivo high-resolution, high-speed multifunctional imaging systems, posing challenges for precise tumor resection and intraoperative decision-making. This study introduces the Fast Adaptive Focus Tracking Robotic Optical Coherence Tomography (FACT-ROCT) system,designed to overcome these obstacles by providing real-time, artifact-free multifunctional imaging of spinal cord tumors during surgery. By integrating cross-scanning, adaptive focus tracking and robotics, the system addresses motion artifacts and resolution degradation from tissue movement, achieving wide-area, high-resolution imaging. We conducted intraoperative imaging on 21 patients, including 13 with spinal gliomas and 8 with other tumors. This study marks the first demonstration of OCT in situ imaging of human spinal cord tumors, providing micrometer-scale in vivo structural images and demonstrating FACT-ROCT's potential to differentiate various tumor types in real-time. Analysis of the attenuation coefficients of spinal gliomas revealed increased heterogeneity with higher malignancy grades. So, we proposed the standard deviation of the attenuation coefficient as a physical marker, achieving over 90% accuracy in distinguishing high- from low-grade gliomas intraoperatively at a threshold. FACT-ROCT even enabled extensive in vivo microvascular imaging of spinal cord tumors, covering 70 mm * 13 mm * 10 mm within 2 minutes. Quantitative vascular tortuosity comparisons confirmed greater tortuosity in higher-grade tumors. The ability to perform extensive vascular imaging and real-time tumor grading during surgery provides critical information for surgical strategy, such as minimizing intraoperative bleeding and optimizing tumor resection while preserving functional tissue. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21809v2-abstract-full').style.display = 'none'; document.getElementById('2410.21809v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18333">arXiv:2410.18333</a> <span> [<a href="https://arxiv.org/pdf/2410.18333">pdf</a>, <a href="https://arxiv.org/format/2410.18333">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Search-Based Path Planning among Movable Obstacles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ren%2C+Z">Zhongqiang Ren</a>, <a href="/search/?searchtype=author&query=Suvonov%2C+B">Bunyod Suvonov</a>, <a href="/search/?searchtype=author&query=Chen%2C+G">Guofei Chen</a>, <a href="/search/?searchtype=author&query=He%2C+B">Botao He</a>, <a href="/search/?searchtype=author&query=Liao%2C+Y">Yijie Liao</a>, <a href="/search/?searchtype=author&query=Fermuller%2C+C">Cornelia Fermuller</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Ji Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18333v1-abstract-short" style="display: inline;"> This paper investigates Path planning Among Movable Obstacles (PAMO), which seeks a minimum cost collision-free path among static obstacles from start to goal while allowing the robot to push away movable obstacles (i.e., objects) along its path when needed. To develop planners that are complete and optimal for PAMO, the planner has to search a giant state space involving both the location of the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18333v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18333v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18333v1-abstract-full" style="display: none;"> This paper investigates Path planning Among Movable Obstacles (PAMO), which seeks a minimum cost collision-free path among static obstacles from start to goal while allowing the robot to push away movable obstacles (i.e., objects) along its path when needed. To develop planners that are complete and optimal for PAMO, the planner has to search a giant state space involving both the location of the robot as well as the locations of the objects, which grows exponentially with respect to the number of objects. The main idea in this paper is that, only a small fraction of this giant state space needs to be explored during planning as guided by a heuristic, and most of the objects far away from the robot are intact, which thus leads to runtime efficient algorithms. Based on this idea, this paper introduces two PAMO formulations, i.e., bi-objective and resource constrained problems in an occupancy grid, and develops PAMO*, a search method with completeness and solution optimality guarantees, to solve the two problems. We then further extend PAMO* to hybrid-state PAMO* to plan in continuous spaces with high-fidelity interaction between the robot and the objects. Our results show that, PAMO* can often find optimal solutions within a second in cluttered environments with up to 400 objects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18333v1-abstract-full').style.display = 'none'; document.getElementById('2410.18333v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17986">arXiv:2410.17986</a> <span> [<a href="https://arxiv.org/pdf/2410.17986">pdf</a>, <a href="https://arxiv.org/format/2410.17986">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Federated Transformer: Multi-Party Vertical Federated Learning on Practical Fuzzily Linked Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wu%2C+Z">Zhaomin Wu</a>, <a href="/search/?searchtype=author&query=Hou%2C+J">Junyi Hou</a>, <a href="/search/?searchtype=author&query=Diao%2C+Y">Yiqun Diao</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17986v1-abstract-short" style="display: inline;"> Federated Learning (FL) is an evolving paradigm that enables multiple parties to collaboratively train models without sharing raw data. Among its variants, Vertical Federated Learning (VFL) is particularly relevant in real-world, cross-organizational collaborations, where distinct features of a shared instance group are contributed by different parties. In these scenarios, parties are often linked… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17986v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17986v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17986v1-abstract-full" style="display: none;"> Federated Learning (FL) is an evolving paradigm that enables multiple parties to collaboratively train models without sharing raw data. Among its variants, Vertical Federated Learning (VFL) is particularly relevant in real-world, cross-organizational collaborations, where distinct features of a shared instance group are contributed by different parties. In these scenarios, parties are often linked using fuzzy identifiers, leading to a common practice termed as multi-party fuzzy VFL. Existing models generally address either multi-party VFL or fuzzy VFL between two parties. Extending these models to practical multi-party fuzzy VFL typically results in significant performance degradation and increased costs for maintaining privacy. To overcome these limitations, we introduce the Federated Transformer (FeT), a novel framework that supports multi-party VFL with fuzzy identifiers. FeT innovatively encodes these identifiers into data representations and employs a transformer architecture distributed across different parties, incorporating three new techniques to enhance performance. Furthermore, we have developed a multi-party privacy framework for VFL that integrates differential privacy with secure multi-party computation, effectively protecting local representations while minimizing associated utility costs. Our experiments demonstrate that the FeT surpasses the baseline models by up to 46\% in terms of accuracy when scaled to 50 parties. Additionally, in two-party fuzzy VFL settings, FeT also shows improved performance and privacy over cutting-edge VFL models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17986v1-abstract-full').style.display = 'none'; document.getElementById('2410.17986v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 38th Conference on Neural Information Processing Systems (NeurIPS 2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13699">arXiv:2410.13699</a> <span> [<a href="https://arxiv.org/pdf/2410.13699">pdf</a>, <a href="https://arxiv.org/format/2410.13699">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Unconstrained Model Merging for Enhanced LLM Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yiming Zhang</a>, <a href="/search/?searchtype=author&query=He%2C+B">Baoyi He</a>, <a href="/search/?searchtype=author&query=Zhang%2C+S">Shengyu Zhang</a>, <a href="/search/?searchtype=author&query=Fu%2C+Y">Yuhao Fu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Q">Qi Zhou</a>, <a href="/search/?searchtype=author&query=Sang%2C+Z">Zhijie Sang</a>, <a href="/search/?searchtype=author&query=Hong%2C+Z">Zijin Hong</a>, <a href="/search/?searchtype=author&query=Yang%2C+K">Kejing Yang</a>, <a href="/search/?searchtype=author&query=Wang%2C+W">Wenjun Wang</a>, <a href="/search/?searchtype=author&query=Yuan%2C+J">Jianbo Yuan</a>, <a href="/search/?searchtype=author&query=Ning%2C+G">Guanghan Ning</a>, <a href="/search/?searchtype=author&query=Li%2C+L">Linyi Li</a>, <a href="/search/?searchtype=author&query=Ji%2C+C">Chunlin Ji</a>, <a href="/search/?searchtype=author&query=Wu%2C+F">Fei Wu</a>, <a href="/search/?searchtype=author&query=Yang%2C+H">Hongxia Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13699v2-abstract-short" style="display: inline;"> Recent advancements in building domain-specific large language models (LLMs) have shown remarkable success, especially in tasks requiring reasoning abilities like logical inference over complex relationships and multi-step problem solving. However, creating a powerful all-in-one LLM remains challenging due to the need for proprietary data and vast computational resources. As a resource-friendly al… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13699v2-abstract-full').style.display = 'inline'; document.getElementById('2410.13699v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13699v2-abstract-full" style="display: none;"> Recent advancements in building domain-specific large language models (LLMs) have shown remarkable success, especially in tasks requiring reasoning abilities like logical inference over complex relationships and multi-step problem solving. However, creating a powerful all-in-one LLM remains challenging due to the need for proprietary data and vast computational resources. As a resource-friendly alternative, we explore the potential of merging multiple expert models into a single LLM. Existing studies on model merging mainly focus on generalist LLMs instead of domain experts, or the LLMs under the same architecture and size. In this work, we propose an unconstrained model merging framework that accommodates both homogeneous and heterogeneous model architectures with a focus on reasoning tasks. A fine-grained layer-wise weight merging strategy is designed for homogeneous models merging, while heterogeneous model merging is built upon the probabilistic distribution knowledge derived from instruction-response fine-tuning data. Across 7 benchmarks and 9 reasoning-optimized LLMs, we reveal key findings that combinatorial reasoning emerges from merging which surpasses simple additive effects. We propose that unconstrained model merging could serve as a foundation for decentralized LLMs, marking a notable progression from the existing centralized LLM framework. This evolution could enhance wider participation and stimulate additional advancement in the field of artificial intelligence, effectively addressing the constraints posed by centralized models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13699v2-abstract-full').style.display = 'none'; document.getElementById('2410.13699v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review, correct typos</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12707">arXiv:2410.12707</a> <span> [<a href="https://arxiv.org/pdf/2410.12707">pdf</a>, <a href="https://arxiv.org/format/2410.12707">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FusionLLM: A Decentralized LLM Training System on Geo-distributed GPUs with Adaptive Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/?searchtype=author&query=Kang%2C+X">Xueze Kang</a>, <a href="/search/?searchtype=author&query=Yin%2C+Y">Yiming Yin</a>, <a href="/search/?searchtype=author&query=Pan%2C+X">Xinglin Pan</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yuxin Wang</a>, <a href="/search/?searchtype=author&query=He%2C+X">Xin He</a>, <a href="/search/?searchtype=author&query=Wang%2C+Q">Qiang Wang</a>, <a href="/search/?searchtype=author&query=Zeng%2C+R">Rongfei Zeng</a>, <a href="/search/?searchtype=author&query=Zhao%2C+K">Kaiyong Zhao</a>, <a href="/search/?searchtype=author&query=Shi%2C+S">Shaohuai Shi</a>, <a href="/search/?searchtype=author&query=Zhou%2C+A+C">Amelie Chi Zhou</a>, <a href="/search/?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12707v1-abstract-short" style="display: inline;"> To alleviate hardware scarcity in training large deep neural networks (DNNs), particularly large language models (LLMs), we present FusionLLM, a decentralized training system designed and implemented for training DNNs using geo-distributed GPUs across different computing clusters or individual devices. Decentralized training faces significant challenges regarding system design and efficiency, incl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12707v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12707v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12707v1-abstract-full" style="display: none;"> To alleviate hardware scarcity in training large deep neural networks (DNNs), particularly large language models (LLMs), we present FusionLLM, a decentralized training system designed and implemented for training DNNs using geo-distributed GPUs across different computing clusters or individual devices. Decentralized training faces significant challenges regarding system design and efficiency, including: 1) the need for remote automatic differentiation (RAD), 2) support for flexible model definitions and heterogeneous software, 3) heterogeneous hardware leading to low resource utilization or the straggler problem, and 4) slow network communication. To address these challenges, in the system design, we represent the model as a directed acyclic graph of operators (OP-DAG). Each node in the DAG represents the operator in the DNNs, while the edge represents the data dependency between operators. Based on this design, 1) users are allowed to customize any DNN without caring low-level operator implementation; 2) we enable the task scheduling with the more fine-grained sub-tasks, offering more optimization space; 3) a DAG runtime executor can implement RAD withour requiring the consistent low-level ML framework versions. To enhance system efficiency, we implement a workload estimator and design an OP-Fence scheduler to cluster devices with similar bandwidths together and partition the DAG to increase throughput. Additionally, we propose an AdaTopK compressor to adaptively compress intermediate activations and gradients at the slowest communication links. To evaluate the convergence and efficiency of our system and algorithms, we train ResNet-101 and GPT-2 on three real-world testbeds using 48 GPUs connected with 8 Mbps~10 Gbps networks. Experimental results demonstrate that our system and method can achieve 1.45 - 9.39x speedup compared to baseline methods while ensuring convergence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12707v1-abstract-full').style.display = 'none'; document.getElementById('2410.12707v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=He%2C+B&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=He%2C+B&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository