Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 455 results for author: <span class="mathjax">Shi, X</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Shi%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Shi, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Shi%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Shi, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Shi%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16579">arXiv:2411.16579</a> <span> [<a href="https://arxiv.org/pdf/2411.16579">pdf</a>, <a href="https://arxiv.org/format/2411.16579">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Enhancing LLM Reasoning via Critique Models with Test-Time and Training-Time Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xi%2C+Z">Zhiheng Xi</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Dingwen Yang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jixuan Huang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiafu Tang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guanyu Li</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yiwen Ding</a>, <a href="/search/cs?searchtype=author&query=He%2C+W">Wei He</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+B">Boyang Hong</a>, <a href="/search/cs?searchtype=author&query=Do%2C+S">Shihan Do</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+W">Wenyu Zhan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiao Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+R">Rui Zheng</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+T">Tao Ji</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaowei Shi</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+Y">Yitao Zhai</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+R">Rongxiang Weng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingang Wang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xunliang Cai</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+T">Tao Gui</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zuxuan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yu-Gang Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16579v1-abstract-short" style="display: inline;"> Training large language models (LLMs) to spend more time thinking and reflection before responding is crucial for effectively solving complex reasoning tasks in fields such as science, coding, and mathematics. However, the effectiveness of mechanisms like self-reflection and self-correction depends on the model's capacity to accurately assess its own performance, which can be limited by factors su… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16579v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16579v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16579v1-abstract-full" style="display: none;"> Training large language models (LLMs) to spend more time thinking and reflection before responding is crucial for effectively solving complex reasoning tasks in fields such as science, coding, and mathematics. However, the effectiveness of mechanisms like self-reflection and self-correction depends on the model's capacity to accurately assess its own performance, which can be limited by factors such as initial accuracy, question difficulty, and the lack of external feedback. In this paper, we delve into a two-player paradigm that separates the roles of reasoning and critique models, where the critique model provides step-level feedback to supervise the reasoning (actor) model during both test-time and train-time. We first propose AutoMathCritique, an automated and scalable framework for collecting critique data, resulting in a dataset of $76,321$ responses paired with step-level feedback. Fine-tuning language models with this dataset enables them to generate natural language feedback for mathematical reasoning. We demonstrate that the critique models consistently improve the actor's performance on difficult queries at test-time, especially when scaling up inference-time computation. Motivated by these findings, we introduce the critique-based supervision to the actor's self-training process, and propose a critique-in-the-loop self-improvement method. Experiments show that the method improves the actor's exploration efficiency and solution diversity, especially on challenging queries, leading to a stronger reasoning model. Lastly, we take the preliminary step to explore training self-talk reasoning models via critique supervision and showcase its potential. Our code and datasets are at \href{https://mathcritique.github.io/}{https://mathcritique.github.io/}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16579v1-abstract-full').style.display = 'none'; document.getElementById('2411.16579v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10695">arXiv:2411.10695</a> <span> [<a href="https://arxiv.org/pdf/2411.10695">pdf</a>, <a href="https://arxiv.org/format/2411.10695">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Series Expansion of Probability of Correct Selection for Improved Finite Budget Allocation in Ranking and Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xinbo Shi</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Y">Yijie Peng</a>, <a href="/search/cs?searchtype=author&query=Tuffin%2C+B">Bruno Tuffin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10695v1-abstract-short" style="display: inline;"> This paper addresses the challenge of improving finite sample performance in Ranking and Selection by developing a Bahadur-Rao type expansion for the Probability of Correct Selection (PCS). While traditional large deviations approximations captures PCS behavior in the asymptotic regime, they can lack precision in finite sample settings. Our approach enhances PCS approximation under limited simulat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10695v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10695v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10695v1-abstract-full" style="display: none;"> This paper addresses the challenge of improving finite sample performance in Ranking and Selection by developing a Bahadur-Rao type expansion for the Probability of Correct Selection (PCS). While traditional large deviations approximations captures PCS behavior in the asymptotic regime, they can lack precision in finite sample settings. Our approach enhances PCS approximation under limited simulation budgets, providing more accurate characterization of optimal sampling ratios and optimality conditions dependent of budgets. Algorithmically, we propose a novel finite budget allocation (FCBA) policy, which sequentially estimates the optimality conditions and accordingly balances the sampling ratios. We illustrate numerically on toy examples that our FCBA policy achieves superior PCS performance compared to tested traditional methods. As an extension, we note that the non-monotonic PCS behavior described in the literature for low-confidence scenarios can be attributed to the negligence of simultaneous incorrect binary comparisons in PCS approximations. We provide a refined expansion and a tailored allocation strategy to handle low-confidence scenarios, addressing the non-monotonicity issue. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10695v1-abstract-full').style.display = 'none'; document.getElementById('2411.10695v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08599">arXiv:2411.08599</a> <span> [<a href="https://arxiv.org/pdf/2411.08599">pdf</a>, <a href="https://arxiv.org/format/2411.08599">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> XiYan-SQL: A Multi-Generator Ensemble Framework for Text-to-SQL </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yingqi Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yifu Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoxia Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaorong Shi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yin Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yiming Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shiqi Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yuntao Hong</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Z">Zhiling Luo</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jinyang Gao</a>, <a href="/search/cs?searchtype=author&query=Mou%2C+L">Liyu Mou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08599v1-abstract-short" style="display: inline;"> To tackle the challenges of large language model performance in natural language to SQL tasks, we introduce XiYan-SQL, an innovative framework that employs a multi-generator ensemble strategy to improve candidate generation. We introduce M-Schema, a semi-structured schema representation method designed to enhance the understanding of database structures. To enhance the quality and diversity of gen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08599v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08599v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08599v1-abstract-full" style="display: none;"> To tackle the challenges of large language model performance in natural language to SQL tasks, we introduce XiYan-SQL, an innovative framework that employs a multi-generator ensemble strategy to improve candidate generation. We introduce M-Schema, a semi-structured schema representation method designed to enhance the understanding of database structures. To enhance the quality and diversity of generated candidate SQL queries, XiYan-SQL integrates the significant potential of in-context learning (ICL) with the precise control of supervised fine-tuning. On one hand, we propose a series of training strategies to fine-tune models to generate high-quality candidates with diverse preferences. On the other hand, we implement the ICL approach with an example selection method based on named entity recognition to prevent overemphasis on entities. The refiner optimizes each candidate by correcting logical or syntactical errors. To address the challenge of identifying the best candidate, we fine-tune a selection model to distinguish nuances of candidate SQL queries. The experimental results on multiple dialect datasets demonstrate the robustness of XiYan-SQL in addressing challenges across different scenarios. Overall, our proposed XiYan-SQL achieves the state-of-the-art execution accuracy of 89.65% on the Spider test set, 69.86% on SQL-Eval, 41.20% on NL2GQL, and a competitive score of 72.23% on the Bird development benchmark. The proposed framework not only enhances the quality and diversity of SQL queries but also outperforms previous methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08599v1-abstract-full').style.display = 'none'; document.getElementById('2411.08599v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2; H.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08348">arXiv:2411.08348</a> <span> [<a href="https://arxiv.org/pdf/2411.08348">pdf</a>, <a href="https://arxiv.org/format/2411.08348">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Refining Translations with LLMs: A Constraint-Aware Iterative Prompting Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shangfeng Chen</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiayang Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yinlin Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingjing Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08348v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable proficiency in machine translation (MT), even without specific training on the languages in question. However, translating rare words in low-resource or domain-specific contexts remains challenging for LLMs. To address this issue, we propose a multi-step prompt chain that enhances translation faithfulness by prioritizing key terms crucial f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08348v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08348v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08348v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable proficiency in machine translation (MT), even without specific training on the languages in question. However, translating rare words in low-resource or domain-specific contexts remains challenging for LLMs. To address this issue, we propose a multi-step prompt chain that enhances translation faithfulness by prioritizing key terms crucial for semantic accuracy. Our method first identifies these keywords and retrieves their translations from a bilingual dictionary, integrating them into the LLM's context using Retrieval-Augmented Generation (RAG). We further mitigate potential output hallucinations caused by long prompts through an iterative self-checking mechanism, where the LLM refines its translations based on lexical and semantic constraints. Experiments using Llama and Qwen as base models on the FLORES-200 and WMT datasets demonstrate significant improvements over baselines, highlighting the effectiveness of our approach in enhancing translation faithfulness and robustness, particularly in low-resource scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08348v1-abstract-full').style.display = 'none'; document.getElementById('2411.08348v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07560">arXiv:2411.07560</a> <span> [<a href="https://arxiv.org/pdf/2411.07560">pdf</a>, <a href="https://arxiv.org/format/2411.07560">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EUR/USD Exchange Rate Forecasting incorporating Text Mining Based on Pre-trained Language Models and Deep Learning Methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiangyu Shi</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+H">Hongcheng Ding</a>, <a href="/search/cs?searchtype=author&query=Faroog%2C+S">Salaar Faroog</a>, <a href="/search/cs?searchtype=author&query=Dewi%2C+D+A">Deshinta Arrova Dewi</a>, <a href="/search/cs?searchtype=author&query=Abdullah%2C+S+N">Shamsul Nahar Abdullah</a>, <a href="/search/cs?searchtype=author&query=Malek%2C+B+A">Bahiah A Malek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07560v1-abstract-short" style="display: inline;"> This study introduces a novel approach for EUR/USD exchange rate forecasting that integrates deep learning, textual analysis, and particle swarm optimization (PSO). By incorporating online news and analysis texts as qualitative data, the proposed PSO-LSTM model demonstrates superior performance compared to traditional econometric and machine learning models. The research employs advanced text mini… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07560v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07560v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07560v1-abstract-full" style="display: none;"> This study introduces a novel approach for EUR/USD exchange rate forecasting that integrates deep learning, textual analysis, and particle swarm optimization (PSO). By incorporating online news and analysis texts as qualitative data, the proposed PSO-LSTM model demonstrates superior performance compared to traditional econometric and machine learning models. The research employs advanced text mining techniques, including sentiment analysis using the RoBERTa-Large model and topic modeling with LDA. Empirical findings underscore the significant advantage of incorporating textual data, with the PSO-LSTM model outperforming benchmark models such as SVM, SVR, ARIMA, and GARCH. Ablation experiments reveal the contribution of each textual data category to the overall forecasting performance. The study highlights the transformative potential of artificial intelligence in finance and paves the way for future research in real-time forecasting and the integration of alternative data sources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07560v1-abstract-full').style.display = 'none'; document.getElementById('2411.07560v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07037">arXiv:2411.07037</a> <span> [<a href="https://arxiv.org/pdf/2411.07037">pdf</a>, <a href="https://arxiv.org/format/2411.07037">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LIFBench: Evaluating the Instruction Following Performance and Stability of Large Language Models in Long-Context Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaodong Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Minhao Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yichen Liu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoming Shi</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+H">He Yan</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiangju Lu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Junmin Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07037v1-abstract-short" style="display: inline;"> As Large Language Models (LLMs) continue to advance in natural language processing (NLP), their ability to stably follow instructions in long-context inputs has become crucial for real-world applications. While existing benchmarks assess various LLM capabilities, they rarely focus on instruction-following in long-context scenarios or stability on different inputs. In response, we introduce the Lon… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07037v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07037v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07037v1-abstract-full" style="display: none;"> As Large Language Models (LLMs) continue to advance in natural language processing (NLP), their ability to stably follow instructions in long-context inputs has become crucial for real-world applications. While existing benchmarks assess various LLM capabilities, they rarely focus on instruction-following in long-context scenarios or stability on different inputs. In response, we introduce the Long-context Instruction-Following Benchmark (LIFBench), a scalable dataset designed to evaluate LLMs' instruction-following capabilities and stability across long contexts. LIFBench comprises three long-context scenarios and eleven diverse tasks, supported by 2,766 instructions generated through an automated expansion method across three dimensions: length, expression, and variables. For evaluation, we propose LIFEval, a rubric-based assessment framework that provides precise, automated scoring of complex LLM responses without relying on LLM-assisted evaluations or human judgments. This approach facilitates a comprehensive analysis of model performance and stability across various perspectives. We conduct extensive experiments on 20 notable LLMs across six length intervals, analyzing their instruction-following capabilities and stability. Our work contributes LIFBench and LIFEval as robust tools for assessing LLM performance in complex, long-context settings, providing insights that can inform future LLM development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07037v1-abstract-full').style.display = 'none'; document.getElementById('2411.07037v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04281">arXiv:2411.04281</a> <span> [<a href="https://arxiv.org/pdf/2411.04281">pdf</a>, <a href="https://arxiv.org/format/2411.04281">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Generating Synthetic Electronic Health Record (EHR) Data: A Review with Benchmarking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xingran Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhenke Wu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xu Shi</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+H">Hyunghoon Cho</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+B">Bhramar Mukherjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04281v1-abstract-short" style="display: inline;"> We conduct a scoping review of existing approaches for synthetic EHR data generation, and benchmark major methods with proposed open-source software to offer recommendations for practitioners. We search three academic databases for our scoping review. Methods are benchmarked on open-source EHR datasets, MIMIC-III/IV. Seven existing methods covering major categories and two baseline methods are imp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04281v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04281v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04281v1-abstract-full" style="display: none;"> We conduct a scoping review of existing approaches for synthetic EHR data generation, and benchmark major methods with proposed open-source software to offer recommendations for practitioners. We search three academic databases for our scoping review. Methods are benchmarked on open-source EHR datasets, MIMIC-III/IV. Seven existing methods covering major categories and two baseline methods are implemented and compared. Evaluation metrics concern data fidelity, downstream utility, privacy protection, and computational cost. 42 studies are identified and classified into five categories. Seven open-source methods covering all categories are selected, trained on MIMIC-III, and evaluated on MIMIC-III or MIMIC-IV for transportability considerations. Among them, GAN-based methods demonstrate competitive performance in fidelity and utility on MIMIC-III; rule-based methods excel in privacy protection. Similar findings are observed on MIMIC-IV, except that GAN-based methods further outperform the baseline methods in preserving fidelity. A Python package, ``SynthEHRella'', is provided to integrate various choices of approaches and evaluation metrics, enabling more streamlined exploration and evaluation of multiple methods. We found that method choice is governed by the relative importance of the evaluation metrics in downstream use cases. We provide a decision tree to guide the choice among the benchmarked methods. Based on the decision tree, GAN-based methods excel when distributional shifts exist between the training and testing populations. Otherwise, CorGAN and MedGAN are most suitable for association modeling and predictive modeling, respectively. Future research should prioritize enhancing fidelity of the synthetic data while controlling privacy exposure, and comprehensive benchmarking of longitudinal or conditional generation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04281v1-abstract-full').style.display = 'none'; document.getElementById('2411.04281v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03059">arXiv:2411.03059</a> <span> [<a href="https://arxiv.org/pdf/2411.03059">pdf</a>, <a href="https://arxiv.org/format/2411.03059">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing DP-SGD through Non-monotonous Adaptive Scaling Gradient Weight </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tao Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Q">Qingyu Huang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xin Shi</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+J">Jiayang Meng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+G">Guolong Zheng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xu Yang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xun Yi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03059v1-abstract-short" style="display: inline;"> In the domain of deep learning, the challenge of protecting sensitive data while maintaining model utility is significant. Traditional Differential Privacy (DP) techniques such as Differentially Private Stochastic Gradient Descent (DP-SGD) typically employ strategies like direct or per-sample adaptive gradient clipping. These methods, however, compromise model accuracy due to their critical influe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03059v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03059v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03059v1-abstract-full" style="display: none;"> In the domain of deep learning, the challenge of protecting sensitive data while maintaining model utility is significant. Traditional Differential Privacy (DP) techniques such as Differentially Private Stochastic Gradient Descent (DP-SGD) typically employ strategies like direct or per-sample adaptive gradient clipping. These methods, however, compromise model accuracy due to their critical influence on gradient handling, particularly neglecting the significant contribution of small gradients during later training stages. In this paper, we introduce an enhanced version of DP-SGD, named Differentially Private Per-sample Adaptive Scaling Clipping (DP-PSASC). This approach replaces traditional clipping with non-monotonous adaptive gradient scaling, which alleviates the need for intensive threshold setting and rectifies the disproportionate weighting of smaller gradients. Our contribution is twofold. First, we develop a novel gradient scaling technique that effectively assigns proper weights to gradients, particularly small ones, thus improving learning under differential privacy. Second, we integrate a momentum-based method into DP-PSASC to reduce bias from stochastic sampling, enhancing convergence rates. Our theoretical and empirical analyses confirm that DP-PSASC preserves privacy and delivers superior performance across diverse datasets, setting new standards for privacy-sensitive applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03059v1-abstract-full').style.display = 'none'; document.getElementById('2411.03059v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02653">arXiv:2411.02653</a> <span> [<a href="https://arxiv.org/pdf/2411.02653">pdf</a>, <a href="https://arxiv.org/format/2411.02653">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Earth and Planetary Astrophysics">astro-ph.EP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1051/0004-6361/202451789">10.1051/0004-6361/202451789 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep operator neural network applied to efficient computation of asteroid surface temperature and the Yarkovsky effect </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shunjing Zhao</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+H">Hanlun Lei</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xian Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02653v1-abstract-short" style="display: inline;"> Surface temperature distribution is crucial for thermal property-based studies about irregular asteroids in our Solar System. While direct numerical simulations could model surface temperatures with high fidelity, they often take a significant amount of computational time, especially for problems where temperature distributions are required to be repeatedly calculated. To this end, deep operator n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02653v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02653v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02653v1-abstract-full" style="display: none;"> Surface temperature distribution is crucial for thermal property-based studies about irregular asteroids in our Solar System. While direct numerical simulations could model surface temperatures with high fidelity, they often take a significant amount of computational time, especially for problems where temperature distributions are required to be repeatedly calculated. To this end, deep operator neural network (DeepONet) provides a powerful tool due to its high computational efficiency and generalization ability. In this work, we applied DeepONet to the modelling of asteroid surface temperatures. Results show that the trained network is able to predict temperature with an accuracy of ~1% on average, while the computational cost is five orders of magnitude lower, hence enabling thermal property analysis in a multidimensional parameter space. As a preliminary application, we analyzed the orbital evolution of asteroids through direct N-body simulations embedded with instantaneous Yarkovsky effect inferred by DeepONet-based thermophysical modelling.Taking asteroids (3200) Phaethon and (89433) 2001 WM41 as examples, we show the efficacy and efficiency of our AI-based approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02653v1-abstract-full').style.display = 'none'; document.getElementById('2411.02653v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted for publication in "Astronomy & Astrophysics"</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02279">arXiv:2411.02279</a> <span> [<a href="https://arxiv.org/pdf/2411.02279">pdf</a>, <a href="https://arxiv.org/format/2411.02279">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ELU-GCN: Effectively Label-Utilizing Graph Convolutional Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jincheng Huang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+Y">Yujie Mo</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoshuang Shi</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+L">Lei Feng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xiaofeng Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02279v1-abstract-short" style="display: inline;"> The message-passing mechanism of graph convolutional networks (i.e., GCNs) enables label information to be propagated to a broader range of neighbors, thereby increasing the utilization of labels. However, the label information is not always effectively utilized in the traditional GCN framework. To address this issue, we propose a new two-step framework called ELU-GCN. In the first stage, ELU-GCN… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02279v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02279v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02279v1-abstract-full" style="display: none;"> The message-passing mechanism of graph convolutional networks (i.e., GCNs) enables label information to be propagated to a broader range of neighbors, thereby increasing the utilization of labels. However, the label information is not always effectively utilized in the traditional GCN framework. To address this issue, we propose a new two-step framework called ELU-GCN. In the first stage, ELU-GCN conducts graph learning to learn a new graph structure (\ie ELU-graph), which enables GCNs to effectively utilize label information. In the second stage, we design a new graph contrastive learning on the GCN framework for representation learning by exploring the consistency and mutually exclusive information between the learned ELU graph and the original graph. Moreover, we theoretically demonstrate that the proposed method can ensure the generalization ability of GCNs. Extensive experiments validate the superiority of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02279v1-abstract-full').style.display = 'none'; document.getElementById('2411.02279v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01791">arXiv:2411.01791</a> <span> [<a href="https://arxiv.org/pdf/2411.01791">pdf</a>, <a href="https://arxiv.org/format/2411.01791">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Minder: Faulty Machine Detection for Large-scale Distributed Model Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+Y">Yangtao Deng</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiang Shi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhuo Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xingjian Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhang Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zuquan Song</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Hang Zhu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gaohong Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fuliang Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuguang Wang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Haibin Lin</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jianxi Ye</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+M">Minlan Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01791v1-abstract-short" style="display: inline;"> Large-scale distributed model training requires simultaneous training on up to thousands of machines. Faulty machine detection is critical when an unexpected fault occurs in a machine. From our experience, a training task can encounter two faults per day on average, possibly leading to a halt for hours. To address the drawbacks of the time-consuming and labor-intensive manual scrutiny, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01791v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01791v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01791v1-abstract-full" style="display: none;"> Large-scale distributed model training requires simultaneous training on up to thousands of machines. Faulty machine detection is critical when an unexpected fault occurs in a machine. From our experience, a training task can encounter two faults per day on average, possibly leading to a halt for hours. To address the drawbacks of the time-consuming and labor-intensive manual scrutiny, we propose Minder, an automatic faulty machine detector for distributed training tasks. The key idea of Minder is to automatically and efficiently detect faulty distinctive monitoring metric patterns, which could last for a period before the entire training task comes to a halt. Minder has been deployed in our production environment for over one year, monitoring daily distributed training tasks where each involves up to thousands of machines. In our real-world fault detection scenarios, Minder can accurately and efficiently react to faults within 3.6 seconds on average, with a precision of 0.904 and F1-score of 0.893. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01791v1-abstract-full').style.display = 'none'; document.getElementById('2411.01791v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00984">arXiv:2411.00984</a> <span> [<a href="https://arxiv.org/pdf/2411.00984">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/GCCE56475.2022.10014364">10.1109/GCCE56475.2022.10014364 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Inter-Feature-Map Differential Coding of Surveillance Video </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Iino%2C+K">Kei Iino</a>, <a href="/search/cs?searchtype=author&query=Takahashi%2C+M">Miho Takahashi</a>, <a href="/search/cs?searchtype=author&query=Watanabe%2C+H">Hiroshi Watanabe</a>, <a href="/search/cs?searchtype=author&query=Morinaga%2C+I">Ichiro Morinaga</a>, <a href="/search/cs?searchtype=author&query=Enomoto%2C+S">Shohei Enomoto</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xu Shi</a>, <a href="/search/cs?searchtype=author&query=Sakamoto%2C+A">Akira Sakamoto</a>, <a href="/search/cs?searchtype=author&query=Eda%2C+T">Takeharu Eda</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00984v1-abstract-short" style="display: inline;"> In Collaborative Intelligence, a deep neural network (DNN) is partitioned and deployed at the edge and the cloud for bandwidth saving and system optimization. When a model input is an image, it has been confirmed that the intermediate feature map, the output from the edge, can be smaller than the input data size. However, its effectiveness has not been reported when the input is a video. In this s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00984v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00984v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00984v1-abstract-full" style="display: none;"> In Collaborative Intelligence, a deep neural network (DNN) is partitioned and deployed at the edge and the cloud for bandwidth saving and system optimization. When a model input is an image, it has been confirmed that the intermediate feature map, the output from the edge, can be smaller than the input data size. However, its effectiveness has not been reported when the input is a video. In this study, we propose a method to compress the feature map of surveillance videos by applying inter-feature-map differential coding (IFMDC). IFMDC shows a compression ratio comparable to, or better than, HEVC to the input video in the case of small accuracy reduction. Our method is especially effective for videos that are sensitive to image quality degradation when HEVC is applied <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00984v1-abstract-full').style.display = 'none'; document.getElementById('2411.00984v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">\c{opyright} 2022 IEEE. Personal use of this material is permitted. Permission from IEEE must be obtained for all other uses, in any current or future media, including reprinting/republishing this material for advertising or promotional purposes, creating new collective works, for resale or redistribution to servers or lists, or reuse of any copyrighted component of this work in other works</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2022 IEEE 11th Global Conference on Consumer Electronics (GCCE) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00750">arXiv:2411.00750</a> <span> [<a href="https://arxiv.org/pdf/2411.00750">pdf</a>, <a href="https://arxiv.org/format/2411.00750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mitigating Tail Narrowing in LLM Self-Improvement via Socratic-Guided Sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yiwen Ding</a>, <a href="/search/cs?searchtype=author&query=Xi%2C+Z">Zhiheng Xi</a>, <a href="/search/cs?searchtype=author&query=He%2C+W">Wei He</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoyuan Li</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+Y">Yitao Zhai</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaowei Shi</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xunliang Cai</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+T">Tao Gui</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00750v1-abstract-short" style="display: inline;"> Self-improvement methods enable large language models (LLMs) to generate solutions themselves and iteratively train on filtered, high-quality rationales. This process proves effective and reduces the reliance on human supervision in LLMs' reasoning, but the performance soon plateaus. We delve into the process and find that models tend to over-sample on easy queries and under-sample on queries they… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00750v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00750v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00750v1-abstract-full" style="display: none;"> Self-improvement methods enable large language models (LLMs) to generate solutions themselves and iteratively train on filtered, high-quality rationales. This process proves effective and reduces the reliance on human supervision in LLMs' reasoning, but the performance soon plateaus. We delve into the process and find that models tend to over-sample on easy queries and under-sample on queries they have yet to master. As iterations proceed, this imbalance in sampling is exacerbated, leading to a long-tail distribution where solutions to difficult queries almost diminish. This phenomenon limits the performance gain of self-improving models. A straightforward solution is brute-force sampling to balance the distribution, which significantly raises computational costs. In this paper, we introduce Guided Self-Improvement (GSI), a strategy aimed at improving the efficiency of sampling challenging heavy-tailed data. It leverages Socratic-style guidance signals to help LLM reasoning with complex queries, reducing the exploration effort and minimizing computational overhead. Experiments on four models across diverse mathematical tasks show that GSI strikes a balance between performance and efficiency, while also being effective on held-out tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00750v1-abstract-full').style.display = 'none'; document.getElementById('2411.00750v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Codes are publicly available at https://github.com/Yiwen-Ding/Guided-Self-Improvement</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.24164">arXiv:2410.24164</a> <span> [<a href="https://arxiv.org/pdf/2410.24164">pdf</a>, <a href="https://arxiv.org/format/2410.24164">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> $蟺_0$: A Vision-Language-Action Flow Model for General Robot Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Black%2C+K">Kevin Black</a>, <a href="/search/cs?searchtype=author&query=Brown%2C+N">Noah Brown</a>, <a href="/search/cs?searchtype=author&query=Driess%2C+D">Danny Driess</a>, <a href="/search/cs?searchtype=author&query=Esmail%2C+A">Adnan Esmail</a>, <a href="/search/cs?searchtype=author&query=Equi%2C+M">Michael Equi</a>, <a href="/search/cs?searchtype=author&query=Finn%2C+C">Chelsea Finn</a>, <a href="/search/cs?searchtype=author&query=Fusai%2C+N">Niccolo Fusai</a>, <a href="/search/cs?searchtype=author&query=Groom%2C+L">Lachy Groom</a>, <a href="/search/cs?searchtype=author&query=Hausman%2C+K">Karol Hausman</a>, <a href="/search/cs?searchtype=author&query=Ichter%2C+B">Brian Ichter</a>, <a href="/search/cs?searchtype=author&query=Jakubczak%2C+S">Szymon Jakubczak</a>, <a href="/search/cs?searchtype=author&query=Jones%2C+T">Tim Jones</a>, <a href="/search/cs?searchtype=author&query=Ke%2C+L">Liyiming Ke</a>, <a href="/search/cs?searchtype=author&query=Levine%2C+S">Sergey Levine</a>, <a href="/search/cs?searchtype=author&query=Li-Bell%2C+A">Adrian Li-Bell</a>, <a href="/search/cs?searchtype=author&query=Mothukuri%2C+M">Mohith Mothukuri</a>, <a href="/search/cs?searchtype=author&query=Nair%2C+S">Suraj Nair</a>, <a href="/search/cs?searchtype=author&query=Pertsch%2C+K">Karl Pertsch</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L+X">Lucy Xiaoyang Shi</a>, <a href="/search/cs?searchtype=author&query=Tanner%2C+J">James Tanner</a>, <a href="/search/cs?searchtype=author&query=Vuong%2C+Q">Quan Vuong</a>, <a href="/search/cs?searchtype=author&query=Walling%2C+A">Anna Walling</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haohuan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhilinsky%2C+U">Ury Zhilinsky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.24164v3-abstract-short" style="display: inline;"> Robot learning holds tremendous promise to unlock the full potential of flexible, general, and dexterous robot systems, as well as to address some of the deepest questions in artificial intelligence. However, bringing robot learning to the level of generality required for effective real-world systems faces major obstacles in terms of data, generalization, and robustness. In this paper, we discuss… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24164v3-abstract-full').style.display = 'inline'; document.getElementById('2410.24164v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.24164v3-abstract-full" style="display: none;"> Robot learning holds tremendous promise to unlock the full potential of flexible, general, and dexterous robot systems, as well as to address some of the deepest questions in artificial intelligence. However, bringing robot learning to the level of generality required for effective real-world systems faces major obstacles in terms of data, generalization, and robustness. In this paper, we discuss how generalist robot policies (i.e., robot foundation models) can address these challenges, and how we can design effective generalist robot policies for complex and highly dexterous tasks. We propose a novel flow matching architecture built on top of a pre-trained vision-language model (VLM) to inherit Internet-scale semantic knowledge. We then discuss how this model can be trained on a large and diverse dataset from multiple dexterous robot platforms, including single-arm robots, dual-arm robots, and mobile manipulators. We evaluate our model in terms of its ability to perform tasks in zero shot after pre-training, follow language instructions from people and from a high-level VLM policy, and its ability to acquire new skills via fine-tuning. Our results cover a wide variety of tasks, such as laundry folding, table cleaning, and assembling boxes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24164v3-abstract-full').style.display = 'none'; document.getElementById('2410.24164v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">See project website for videos: https://physicalintelligence.company/blog/pi0</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21533">arXiv:2410.21533</a> <span> [<a href="https://arxiv.org/pdf/2410.21533">pdf</a>, <a href="https://arxiv.org/format/2410.21533">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> L3Ms -- Lagrange Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dhillon%2C+G+S">Guneet S. Dhillon</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xingjian Shi</a>, <a href="/search/cs?searchtype=author&query=Teh%2C+Y+W">Yee Whye Teh</a>, <a href="/search/cs?searchtype=author&query=Smola%2C+A">Alex Smola</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21533v1-abstract-short" style="display: inline;"> Supervised fine-tuning (SFT) and alignment of large language models (LLMs) are key steps in providing a good user experience. However, the concept of an appropriate alignment is inherently application-dependent, and current methods often rely on heuristic choices to drive the optimization. In this work, we formulate SFT and alignment as a constrained optimization problem, where the LLM is trained… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21533v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21533v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21533v1-abstract-full" style="display: none;"> Supervised fine-tuning (SFT) and alignment of large language models (LLMs) are key steps in providing a good user experience. However, the concept of an appropriate alignment is inherently application-dependent, and current methods often rely on heuristic choices to drive the optimization. In this work, we formulate SFT and alignment as a constrained optimization problem, where the LLM is trained on a task while being required to meet application-specific requirements, without resorting to heuristics. To solve this, we propose Lagrange Large Language Models (L3Ms), which employ logarithmic barriers to enforce the constraints. This approach allows for the customization of L3Ms across diverse applications while avoiding heuristic-driven processes. We demonstrate experimentally the versatility and efficacy of L3Ms in achieving tailored alignments for various applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21533v1-abstract-full').style.display = 'none'; document.getElementById('2410.21533v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21083">arXiv:2410.21083</a> <span> [<a href="https://arxiv.org/pdf/2410.21083">pdf</a>, <a href="https://arxiv.org/format/2410.21083">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Stealthy Jailbreak Attacks on Large Language Models via Benign Data Mirroring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mu%2C+H">Honglin Mu</a>, <a href="/search/cs?searchtype=author&query=He%2C+H">Han He</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuxin Zhou</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yunlong Feng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yang Xu</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+L">Libo Qin</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoming Shi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zeming Liu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xudong Han</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Q">Qi Shi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qingfu Zhu</a>, <a href="/search/cs?searchtype=author&query=Che%2C+W">Wanxiang Che</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21083v1-abstract-short" style="display: inline;"> Large language model (LLM) safety is a critical issue, with numerous studies employing red team testing to enhance model security. Among these, jailbreak methods explore potential vulnerabilities by crafting malicious prompts that induce model outputs contrary to safety alignments. Existing black-box jailbreak methods often rely on model feedback, repeatedly submitting queries with detectable mali… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21083v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21083v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21083v1-abstract-full" style="display: none;"> Large language model (LLM) safety is a critical issue, with numerous studies employing red team testing to enhance model security. Among these, jailbreak methods explore potential vulnerabilities by crafting malicious prompts that induce model outputs contrary to safety alignments. Existing black-box jailbreak methods often rely on model feedback, repeatedly submitting queries with detectable malicious instructions during the attack search process. Although these approaches are effective, the attacks may be intercepted by content moderators during the search process. We propose an improved transfer attack method that guides malicious prompt construction by locally training a mirror model of the target black-box model through benign data distillation. This method offers enhanced stealth, as it does not involve submitting identifiable malicious instructions to the target model during the search phase. Our approach achieved a maximum attack success rate of 92%, or a balanced value of 80% with an average of 1.5 detectable jailbreak queries per sample against GPT-3.5 Turbo on a subset of AdvBench. These results underscore the need for more robust defense mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21083v1-abstract-full').style.display = 'none'; document.getElementById('2410.21083v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20451">arXiv:2410.20451</a> <span> [<a href="https://arxiv.org/pdf/2410.20451">pdf</a>, <a href="https://arxiv.org/format/2410.20451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BlinkVision: A Benchmark for Optical Flow, Scene Flow and Point Tracking Estimation using RGB Frames and Events </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yijin Li</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yichen Shen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhaoyang Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shuo Chen</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+W">Weikang Bian</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoyu Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fu-Yun Wang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+K">Keqiang Sun</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+H">Hujun Bao</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+Z">Zhaopeng Cui</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guofeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongsheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20451v1-abstract-short" style="display: inline;"> Recent advances in event-based vision suggest that these systems complement traditional cameras by providing continuous observation without frame rate limitations and a high dynamic range, making them well-suited for correspondence tasks such as optical flow and point tracking. However, there is still a lack of comprehensive benchmarks for correspondence tasks that include both event data and imag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20451v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20451v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20451v1-abstract-full" style="display: none;"> Recent advances in event-based vision suggest that these systems complement traditional cameras by providing continuous observation without frame rate limitations and a high dynamic range, making them well-suited for correspondence tasks such as optical flow and point tracking. However, there is still a lack of comprehensive benchmarks for correspondence tasks that include both event data and images. To address this gap, we propose BlinkVision, a large-scale and diverse benchmark with multiple modalities and dense correspondence annotations. BlinkVision offers several valuable features: 1) Rich modalities: It includes both event data and RGB images. 2) Extensive annotations: It provides dense per-pixel annotations covering optical flow, scene flow, and point tracking. 3) Large vocabulary: It contains 410 everyday categories, sharing common classes with popular 2D and 3D datasets like LVIS and ShapeNet. 4) Naturalistic: It delivers photorealistic data and covers various naturalistic factors, such as camera shake and deformation. BlinkVision enables extensive benchmarks on three types of correspondence tasks (optical flow, point tracking, and scene flow estimation) for both image-based and event-based methods, offering new observations, practices, and insights for future research. The benchmark website is https://www.blinkvision.net/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20451v1-abstract-full').style.display = 'none'; document.getElementById('2410.20451v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ECCV 2024. Project Page: https://www.blinkvision.net/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18693">arXiv:2410.18693</a> <span> [<a href="https://arxiv.org/pdf/2410.18693">pdf</a>, <a href="https://arxiv.org/format/2410.18693">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unleashing Reasoning Capability of LLMs via Scalable Question Synthesis from Scratch </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yuyang Ding</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xinyu Shi</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiaobo Liang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Juntao Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qiaoming Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Min Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18693v1-abstract-short" style="display: inline;"> The availability of high-quality data is one of the most important factors in improving the reasoning capability of LLMs. Existing works have demonstrated the effectiveness of creating more instruction data from seed questions or knowledge bases. Recent research indicates that continually scaling up data synthesis from strong models (e.g., GPT-4) can further elicit reasoning performance. Though pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18693v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18693v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18693v1-abstract-full" style="display: none;"> The availability of high-quality data is one of the most important factors in improving the reasoning capability of LLMs. Existing works have demonstrated the effectiveness of creating more instruction data from seed questions or knowledge bases. Recent research indicates that continually scaling up data synthesis from strong models (e.g., GPT-4) can further elicit reasoning performance. Though promising, the open-sourced community still lacks high-quality data at scale and scalable data synthesis methods with affordable costs. To address this, we introduce ScaleQuest, a scalable and novel data synthesis method that utilizes "small-size" (e.g., 7B) open-source models to generate questions from scratch without the need for seed data with complex augmentation constraints. With the efficient ScaleQuest, we automatically constructed a mathematical reasoning dataset consisting of 1 million problem-solution pairs, which are more effective than existing open-sourced datasets. It can universally increase the performance of mainstream open-source models (i.e., Mistral, Llama3, DeepSeekMath, and Qwen2-Math) by achieving 29.2% to 46.4% gains on MATH. Notably, simply fine-tuning the Qwen2-Math-7B-Base model with our dataset can even surpass Qwen2-Math-7B-Instruct, a strong and well-aligned model on closed-source data, and proprietary models such as GPT-4-Turbo and Claude-3.5 Sonnet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18693v1-abstract-full').style.display = 'none'; document.getElementById('2410.18693v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint. Project page: https://scalequest.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18505">arXiv:2410.18505</a> <span> [<a href="https://arxiv.org/pdf/2410.18505">pdf</a>, <a href="https://arxiv.org/format/2410.18505">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CCI3.0-HQ: a large-scale Chinese dataset of high quality designed for pre-training large language models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liangdong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bo-Wen Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chengwei Wu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hanyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaofeng Shi</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Shuhao Gu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jijie Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Q">Quanyue Ma</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+T">TengFei Pan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18505v2-abstract-short" style="display: inline;"> We present CCI3.0-HQ (https://huggingface.co/datasets/BAAI/CCI3-HQ), a high-quality 500GB subset of the Chinese Corpora Internet 3.0 (CCI3.0)(https://huggingface.co/datasets/BAAI/CCI3-Data), developed using a novel two-stage hybrid filtering pipeline that significantly enhances data quality. To evaluate its effectiveness, we trained a 0.5B parameter model from scratch on 100B tokens across various… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18505v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18505v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18505v2-abstract-full" style="display: none;"> We present CCI3.0-HQ (https://huggingface.co/datasets/BAAI/CCI3-HQ), a high-quality 500GB subset of the Chinese Corpora Internet 3.0 (CCI3.0)(https://huggingface.co/datasets/BAAI/CCI3-Data), developed using a novel two-stage hybrid filtering pipeline that significantly enhances data quality. To evaluate its effectiveness, we trained a 0.5B parameter model from scratch on 100B tokens across various datasets, achieving superior performance on 10 benchmarks in a zero-shot setting compared to CCI3.0, SkyPile, and WanjuanV1. The high-quality filtering process effectively distills the capabilities of the Qwen2-72B-instruct model into a compact 0.5B model, attaining optimal F1 scores for Chinese web data classification. We believe this open-access dataset will facilitate broader access to high-quality language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18505v2-abstract-full').style.display = 'none'; document.getElementById('2410.18505v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16032">arXiv:2410.16032</a> <span> [<a href="https://arxiv.org/pdf/2410.16032">pdf</a>, <a href="https://arxiv.org/format/2410.16032">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TimeMixer++: A General Time Series Pattern Machine for Universal Predictive Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiyu Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiawei Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoming Shi</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Z">Zhou Ye</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+B">Baichuan Mo</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wenze Lin</a>, <a href="/search/cs?searchtype=author&query=Ju%2C+S">Shengtong Ju</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+Z">Zhixuan Chu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+M">Ming Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16032v1-abstract-short" style="display: inline;"> Time series analysis plays a critical role in numerous applications, supporting tasks such as forecasting, classification, anomaly detection, and imputation. In this work, we present the time series pattern machine (TSPM), a model designed to excel in a broad range of time series tasks through powerful representation and pattern extraction capabilities. Traditional time series models often struggl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16032v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16032v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16032v1-abstract-full" style="display: none;"> Time series analysis plays a critical role in numerous applications, supporting tasks such as forecasting, classification, anomaly detection, and imputation. In this work, we present the time series pattern machine (TSPM), a model designed to excel in a broad range of time series tasks through powerful representation and pattern extraction capabilities. Traditional time series models often struggle to capture universal patterns, limiting their effectiveness across diverse tasks. To address this, we define multiple scales in the time domain and various resolutions in the frequency domain, employing various mixing strategies to extract intricate, task-adaptive time series patterns. Specifically, we introduce a general-purpose TSPM that processes multi-scale time series using (1) multi-resolution time imaging (MRTI), (2) time image decomposition (TID), (3) multi-scale mixing (MCM), and (4) multi-resolution mixing (MRM) to extract comprehensive temporal patterns. MRTI transforms multi-scale time series into multi-resolution time images, capturing patterns across both temporal and frequency domains. TID leverages dual-axis attention to extract seasonal and trend patterns, while MCM hierarchically aggregates these patterns across scales. MRM adaptively integrates all representations across resolutions. This method achieves state-of-the-art performance across 8 time series analytical tasks, consistently surpassing both general-purpose and task-specific models. Our work marks a promising step toward the next generation of TSPMs, paving the way for further advancements in time series analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16032v1-abstract-full').style.display = 'none'; document.getElementById('2410.16032v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12856">arXiv:2410.12856</a> <span> [<a href="https://arxiv.org/pdf/2410.12856">pdf</a>, <a href="https://arxiv.org/format/2410.12856">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Optimized Biomedical Question-Answering Services with LLM and Multi-BERT Integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qian%2C+C">Cheng Qian</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xianglong Shi</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+S">Shanshan Yao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yichen Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fengming Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zishu Zhang</a>, <a href="/search/cs?searchtype=author&query=Akram%2C+J">Junaid Akram</a>, <a href="/search/cs?searchtype=author&query=Braytee%2C+A">Ali Braytee</a>, <a href="/search/cs?searchtype=author&query=Anaissi%2C+A">Ali Anaissi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12856v1-abstract-short" style="display: inline;"> We present a refined approach to biomedical question-answering (QA) services by integrating large language models (LLMs) with Multi-BERT configurations. By enhancing the ability to process and prioritize vast amounts of complex biomedical data, this system aims to support healthcare professionals in delivering better patient outcomes and informed decision-making. Through innovative use of BERT and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12856v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12856v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12856v1-abstract-full" style="display: none;"> We present a refined approach to biomedical question-answering (QA) services by integrating large language models (LLMs) with Multi-BERT configurations. By enhancing the ability to process and prioritize vast amounts of complex biomedical data, this system aims to support healthcare professionals in delivering better patient outcomes and informed decision-making. Through innovative use of BERT and BioBERT models, combined with a multi-layer perceptron (MLP) layer, we enable more specialized and efficient responses to the growing demands of the healthcare sector. Our approach not only addresses the challenge of overfitting by freezing one BERT model while training another but also improves the overall adaptability of QA services. The use of extensive datasets, such as BioASQ and BioMRC, demonstrates the system's ability to synthesize critical information. This work highlights how advanced language models can make a tangible difference in healthcare, providing reliable and responsive tools for professionals to manage complex information, ultimately serving the broader goal of improved care and data-driven insights. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12856v1-abstract-full').style.display = 'none'; document.getElementById('2410.12856v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 12 figures, accepted and to be published in the proceedings of 2024 IEEE International Conference on Data Mining Workshops (ICDMW)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11845">arXiv:2410.11845</a> <span> [<a href="https://arxiv.org/pdf/2410.11845">pdf</a>, <a href="https://arxiv.org/ps/2410.11845">ps</a>, <a href="https://arxiv.org/format/2410.11845">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> A Review on Edge Large Language Models: Design, Execution, and Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yue Zheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuhao Chen</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+B">Bin Qian</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiufang Shi</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+Y">Yuanchao Shu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiming Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11845v1-abstract-short" style="display: inline;"> Large language models (LLMs) have revolutionized natural language processing with their exceptional capabilities. However, deploying LLMs on resource-constrained edge devices presents significant challenges due to computational limitations, memory constraints, and edge hardware heterogeneity. This survey summarizes recent developments in edge LLMs across their lifecycle, examining resource-efficie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11845v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11845v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11845v1-abstract-full" style="display: none;"> Large language models (LLMs) have revolutionized natural language processing with their exceptional capabilities. However, deploying LLMs on resource-constrained edge devices presents significant challenges due to computational limitations, memory constraints, and edge hardware heterogeneity. This survey summarizes recent developments in edge LLMs across their lifecycle, examining resource-efficient designs from pre-deployment techniques to runtime optimizations. Additionally, it explores on-device LLM applications in personal, enterprise, and industrial scenarios. By synthesizing advancements and identifying future directions, this survey aims to provide a comprehensive understanding of state-of-the-art methods for deploying LLMs on edge devices, bridging the gap between their immense potential and edge computing limitations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11845v1-abstract-full').style.display = 'none'; document.getElementById('2410.11845v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11533">arXiv:2410.11533</a> <span>  </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-round jailbreak attack on large language models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yihua Zhou</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaochuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11533v2-abstract-short" style="display: inline;"> Ensuring the safety and alignment of large language models (LLMs) with human values is crucial for generating responses that are beneficial to humanity. While LLMs have the capability to identify and avoid harmful queries, they remain vulnerable to "jailbreak" attacks, where carefully crafted prompts can induce the generation of toxic content. Traditional single-round jailbreak attacks, such as GC… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11533v2-abstract-full').style.display = 'inline'; document.getElementById('2410.11533v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11533v2-abstract-full" style="display: none;"> Ensuring the safety and alignment of large language models (LLMs) with human values is crucial for generating responses that are beneficial to humanity. While LLMs have the capability to identify and avoid harmful queries, they remain vulnerable to "jailbreak" attacks, where carefully crafted prompts can induce the generation of toxic content. Traditional single-round jailbreak attacks, such as GCG and AutoDAN, do not alter the sensitive words in the dangerous prompts. Although they can temporarily bypass the model's safeguards through prompt engineering, their success rate drops significantly as the LLM is further fine-tuned, and they cannot effectively circumvent static rule-based filters that remove the hazardous vocabulary. In this study, to better understand jailbreak attacks, we introduce a multi-round jailbreak approach. This method can rewrite the dangerous prompts, decomposing them into a series of less harmful sub-questions to bypass the LLM's safety checks. We first use the LLM to perform a decomposition task, breaking down a set of natural language questions into a sequence of progressive sub-questions, which are then used to fine-tune the Llama3-8B model, enabling it to decompose hazardous prompts. The fine-tuned model is then used to break down the problematic prompt, and the resulting sub-questions are sequentially asked to the victim model. If the victim model rejects a sub-question, a new decomposition is generated, and the process is repeated until the final objective is achieved. Our experimental results show a 94\% success rate on the llama2-7B and demonstrate the effectiveness of this approach in circumventing static rule-based filters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11533v2-abstract-full').style.display = 'none'; document.getElementById('2410.11533v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">It is not fully completed</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09342">arXiv:2410.09342</a> <span> [<a href="https://arxiv.org/pdf/2410.09342">pdf</a>, <a href="https://arxiv.org/format/2410.09342">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLM$\times$MapReduce: Simplified Long-Sequence Processing using Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zihan Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chong Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xinyi Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Chao%2C+Y">Yu Chao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhili Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/cs?searchtype=author&query=An%2C+R">Rongqiao An</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Q">Qi Shi</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zhixing Tan</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaodong Shi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09342v1-abstract-short" style="display: inline;"> Enlarging the context window of large language models (LLMs) has become a crucial research area, particularly for applications involving extremely long texts. In this work, we propose a novel training-free framework for processing long texts, utilizing a divide-and-conquer strategy to achieve comprehensive document understanding. The proposed LLM$\times$MapReduce framework splits the entire docume… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09342v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09342v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09342v1-abstract-full" style="display: none;"> Enlarging the context window of large language models (LLMs) has become a crucial research area, particularly for applications involving extremely long texts. In this work, we propose a novel training-free framework for processing long texts, utilizing a divide-and-conquer strategy to achieve comprehensive document understanding. The proposed LLM$\times$MapReduce framework splits the entire document into several chunks for LLMs to read and then aggregates the intermediate answers to produce the final output. The main challenge for divide-and-conquer long text processing frameworks lies in the risk of losing essential long-range information when splitting the document, which can lead the model to produce incomplete or incorrect answers based on the segmented texts. Disrupted long-range information can be classified into two categories: inter-chunk dependency and inter-chunk conflict. We design a structured information protocol to better cope with inter-chunk dependency and an in-context confidence calibration mechanism to resolve inter-chunk conflicts. Experimental results demonstrate that LLM$\times$MapReduce can outperform representative open-source and commercial long-context LLMs, and is applicable to several different models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09342v1-abstract-full').style.display = 'none'; document.getElementById('2410.09342v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in Progress. Code: https://github.com/thunlp/LLMxMapReduce</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07711">arXiv:2410.07711</a> <span> [<a href="https://arxiv.org/pdf/2410.07711">pdf</a>, <a href="https://arxiv.org/format/2410.07711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Rethinking the Principle of Gradient Smooth Methods in Model Explanation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+L">Linjiang Zhou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chao Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zepeng Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaochuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07711v1-abstract-short" style="display: inline;"> Gradient Smoothing is an efficient approach to reducing noise in gradient-based model explanation method. SmoothGrad adds Gaussian noise to mitigate much of these noise. However, the crucial hyper-parameter in this method, the variance $蟽$ of Gaussian noise, is set manually or with heuristic approach. However, it results in the smoothed gradients still containing a certain amount of noise. In this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07711v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07711v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07711v1-abstract-full" style="display: none;"> Gradient Smoothing is an efficient approach to reducing noise in gradient-based model explanation method. SmoothGrad adds Gaussian noise to mitigate much of these noise. However, the crucial hyper-parameter in this method, the variance $蟽$ of Gaussian noise, is set manually or with heuristic approach. However, it results in the smoothed gradients still containing a certain amount of noise. In this paper, we aim to interpret SmoothGrad as a corollary of convolution, thereby re-understanding the gradient noise and the role of $蟽$ from the perspective of confidence level. Furthermore, we propose an adaptive gradient smoothing method, AdaptGrad, based on these insights. Through comprehensive experiments, both qualitative and quantitative results demonstrate that AdaptGrad could effectively reduce almost all the noise in vanilla gradients compared with baselines methods. AdaptGrad is simple and universal, making it applicable for enhancing gradient-based interpretability methods for better visualization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07711v1-abstract-full').style.display = 'none'; document.getElementById('2410.07711v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19585">arXiv:2409.19585</a> <span> [<a href="https://arxiv.org/pdf/2409.19585">pdf</a>, <a href="https://arxiv.org/format/2409.19585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Two-stage Framework for Robust Speech Emotion Recognition Using Target Speaker Extraction in Human Speech Noise Conditions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mi%2C+J">Jinyi Mi</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaohan Shi</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+D">Ding Ma</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jiajun He</a>, <a href="/search/cs?searchtype=author&query=Fujimura%2C+T">Takuya Fujimura</a>, <a href="/search/cs?searchtype=author&query=Toda%2C+T">Tomoki Toda</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19585v1-abstract-short" style="display: inline;"> Developing a robust speech emotion recognition (SER) system in noisy conditions faces challenges posed by different noise properties. Most previous studies have not considered the impact of human speech noise, thus limiting the application scope of SER. In this paper, we propose a novel two-stage framework for the problem by cascading target speaker extraction (TSE) method and SER. We first train… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19585v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19585v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19585v1-abstract-full" style="display: none;"> Developing a robust speech emotion recognition (SER) system in noisy conditions faces challenges posed by different noise properties. Most previous studies have not considered the impact of human speech noise, thus limiting the application scope of SER. In this paper, we propose a novel two-stage framework for the problem by cascading target speaker extraction (TSE) method and SER. We first train a TSE model to extract the speech of target speaker from a mixture. Then, in the second stage, we utilize the extracted speech for SER training. Additionally, we explore a joint training of TSE and SER models in the second stage. Our developed system achieves a 14.33% improvement in unweighted accuracy (UA) compared to a baseline without using TSE method, demonstrating the effectiveness of our framework in mitigating the impact of human speech noise. Moreover, we conduct experiments considering speaker gender, showing that our framework performs particularly well in different-gender mixture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19585v1-abstract-full').style.display = 'none'; document.getElementById('2409.19585v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to APSIPA ASC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17589">arXiv:2409.17589</a> <span> [<a href="https://arxiv.org/pdf/2409.17589">pdf</a>, <a href="https://arxiv.org/format/2409.17589">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Improving Fast Adversarial Training via Self-Knowledge Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chengze Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junkai Wang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+M">Minjing Dong</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+J">Jie Gui</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xinli Shi</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuan Cao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y+Y">Yuan Yan Tang</a>, <a href="/search/cs?searchtype=author&query=Kwok%2C+J+T">James Tin-Yau Kwok</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17589v1-abstract-short" style="display: inline;"> Adversarial training has achieved remarkable advancements in defending against adversarial attacks. Among them, fast adversarial training (FAT) is gaining attention for its ability to achieve competitive robustness with fewer computing resources. Existing FAT methods typically employ a uniform strategy that optimizes all training data equally without considering the influence of different examples… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17589v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17589v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17589v1-abstract-full" style="display: none;"> Adversarial training has achieved remarkable advancements in defending against adversarial attacks. Among them, fast adversarial training (FAT) is gaining attention for its ability to achieve competitive robustness with fewer computing resources. Existing FAT methods typically employ a uniform strategy that optimizes all training data equally without considering the influence of different examples, which leads to an imbalanced optimization. However, this imbalance remains unexplored in the field of FAT. In this paper, we conduct a comprehensive study of the imbalance issue in FAT and observe an obvious class disparity regarding their performances. This disparity could be embodied from a perspective of alignment between clean and robust accuracy. Based on the analysis, we mainly attribute the observed misalignment and disparity to the imbalanced optimization in FAT, which motivates us to optimize different training data adaptively to enhance robustness. Specifically, we take disparity and misalignment into consideration. First, we introduce self-knowledge guided regularization, which assigns differentiated regularization weights to each class based on its training state, alleviating class disparity. Additionally, we propose self-knowledge guided label relaxation, which adjusts label relaxation according to the training accuracy, alleviating the misalignment and improving robustness. By combining these methods, we formulate the Self-Knowledge Guided FAT (SKG-FAT), leveraging naturally generated knowledge during training to enhance the adversarial robustness without compromising training efficiency. Extensive experiments on four standard datasets demonstrate that the SKG-FAT improves the robustness and preserves competitive clean accuracy, outperforming the state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17589v1-abstract-full').style.display = 'none'; document.getElementById('2409.17589v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17517">arXiv:2409.17517</a> <span> [<a href="https://arxiv.org/pdf/2409.17517">pdf</a>, <a href="https://arxiv.org/format/2409.17517">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Dataset Distillation-based Hybrid Federated Learning on Non-IID Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiufang Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Mincheng Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyi Liu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zhenyu Wen</a>, <a href="/search/cs?searchtype=author&query=He%2C+S">Shibo He</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+T">Tejal Shah</a>, <a href="/search/cs?searchtype=author&query=Ranjan%2C+R">Rajiv Ranjan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17517v1-abstract-short" style="display: inline;"> In federated learning, the heterogeneity of client data has a great impact on the performance of model training. Many heterogeneity issues in this process are raised by non-independently and identically distributed (Non-IID) data. This study focuses on the issue of label distribution skew. To address it, we propose a hybrid federated learning framework called HFLDD, which integrates dataset distil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17517v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17517v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17517v1-abstract-full" style="display: none;"> In federated learning, the heterogeneity of client data has a great impact on the performance of model training. Many heterogeneity issues in this process are raised by non-independently and identically distributed (Non-IID) data. This study focuses on the issue of label distribution skew. To address it, we propose a hybrid federated learning framework called HFLDD, which integrates dataset distillation to generate approximately independent and equally distributed (IID) data, thereby improving the performance of model training. Particularly, we partition the clients into heterogeneous clusters, where the data labels among different clients within a cluster are unbalanced while the data labels among different clusters are balanced. The cluster headers collect distilled data from the corresponding cluster members, and conduct model training in collaboration with the server. This training process is like traditional federated learning on IID data, and hence effectively alleviates the impact of Non-IID data on model training. Furthermore, we compare our proposed method with typical baseline methods on public datasets. Experimental results demonstrate that when the data labels are severely imbalanced, the proposed HFLDD outperforms the baseline methods in terms of both test accuracy and communication cost. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17517v1-abstract-full').style.display = 'none'; document.getElementById('2409.17517v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16040">arXiv:2409.16040</a> <span> [<a href="https://arxiv.org/pdf/2409.16040">pdf</a>, <a href="https://arxiv.org/format/2409.16040">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Time-MoE: Billion-Scale Time Series Foundation Models with Mixture of Experts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoming Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiyu Wang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+Y">Yuqi Nie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dianqi Li</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Z">Zhou Ye</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Q">Qingsong Wen</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+M">Ming Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16040v2-abstract-short" style="display: inline;"> Deep learning for time series forecasting has seen significant advancements over the past decades. However, despite the success of large-scale pre-training in language and vision domains, pre-trained time series models remain limited in scale and operate at a high cost, hindering the development of larger capable forecasting models in real-world applications. In response, we introduce Time-MoE, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16040v2-abstract-full').style.display = 'inline'; document.getElementById('2409.16040v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16040v2-abstract-full" style="display: none;"> Deep learning for time series forecasting has seen significant advancements over the past decades. However, despite the success of large-scale pre-training in language and vision domains, pre-trained time series models remain limited in scale and operate at a high cost, hindering the development of larger capable forecasting models in real-world applications. In response, we introduce Time-MoE, a scalable and unified architecture designed to pre-train larger, more capable forecasting foundation models while reducing inference costs. By leveraging a sparse mixture-of-experts (MoE) design, Time-MoE enhances computational efficiency by activating only a subset of networks for each prediction, reducing computational load while maintaining high model capacity. This allows Time-MoE to scale effectively without a corresponding increase in inference costs. Time-MoE comprises a family of decoder-only transformer models that operate in an auto-regressive manner and support flexible forecasting horizons with varying input context lengths. We pre-trained these models on our newly introduced large-scale data Time-300B, which spans over 9 domains and encompassing over 300 billion time points. For the first time, we scaled a time series foundation model up to 2.4 billion parameters, achieving significantly improved forecasting precision. Our results validate the applicability of scaling laws for training tokens and model size in the context of time series forecasting. Compared to dense models with the same number of activated parameters or equivalent computation budgets, our models consistently outperform them by large margin. These advancements position Time-MoE as a state-of-the-art solution for tackling real-world time series forecasting challenges with superior capability, efficiency, and flexibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16040v2-abstract-full').style.display = 'none'; document.getElementById('2409.16040v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 10 figures, 13 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15525">arXiv:2409.15525</a> <span> [<a href="https://arxiv.org/pdf/2409.15525">pdf</a>, <a href="https://arxiv.org/format/2409.15525">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Speech2rtMRI: Speech-Guided Diffusion Model for Real-time MRI Video of the Vocal Tract during Speech </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hong Nguyen</a>, <a href="/search/cs?searchtype=author&query=Foley%2C+S">Sean Foley</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kevin Huang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xuan Shi</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tiantian Feng</a>, <a href="/search/cs?searchtype=author&query=Narayanan%2C+S">Shrikanth Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15525v1-abstract-short" style="display: inline;"> Understanding speech production both visually and kinematically can inform second language learning system designs, as well as the creation of speaking characters in video games and animations. In this work, we introduce a data-driven method to visually represent articulator motion in Magnetic Resonance Imaging (MRI) videos of the human vocal tract during speech based on arbitrary audio or speech… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15525v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15525v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15525v1-abstract-full" style="display: none;"> Understanding speech production both visually and kinematically can inform second language learning system designs, as well as the creation of speaking characters in video games and animations. In this work, we introduce a data-driven method to visually represent articulator motion in Magnetic Resonance Imaging (MRI) videos of the human vocal tract during speech based on arbitrary audio or speech input. We leverage large pre-trained speech models, which are embedded with prior knowledge, to generalize the visual domain to unseen data using a speech-to-video diffusion model. Our findings demonstrate that the visual generation significantly benefits from the pre-trained speech representations. We also observed that evaluating phonemes in isolation is challenging but becomes more straightforward when assessed within the context of spoken words. Limitations of the current results include the presence of unsmooth tongue motion and video distortion when the tongue contacts the palate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15525v1-abstract-full').style.display = 'none'; document.getElementById('2409.15525v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14945">arXiv:2409.14945</a> <span> [<a href="https://arxiv.org/pdf/2409.14945">pdf</a>, <a href="https://arxiv.org/format/2409.14945">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3624918.3625323">10.1145/3624918.3625323 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Adaptive Learning on User Segmentation: Universal to Specific Representation via Bipartite Neural Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xiaoyu Tan</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Y">Yongxin Deng</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+C">Chao Qu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+S">Siqiao Xue</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoming Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">James Zhang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xihe Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14945v1-abstract-short" style="display: inline;"> Recently, models for user representation learning have been widely applied in click-through-rate (CTR) and conversion-rate (CVR) prediction. Usually, the model learns a universal user representation as the input for subsequent scenario-specific models. However, in numerous industrial applications (e.g., recommendation and marketing), the business always operates such applications as various online… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14945v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14945v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14945v1-abstract-full" style="display: none;"> Recently, models for user representation learning have been widely applied in click-through-rate (CTR) and conversion-rate (CVR) prediction. Usually, the model learns a universal user representation as the input for subsequent scenario-specific models. However, in numerous industrial applications (e.g., recommendation and marketing), the business always operates such applications as various online activities among different user segmentation. These segmentation are always created by domain experts. Due to the difference in user distribution (i.e., user segmentation) and business objectives in subsequent tasks, learning solely on universal representation may lead to detrimental effects on both model performance and robustness. In this paper, we propose a novel learning framework that can first learn general universal user representation through information bottleneck. Then, merge and learn a segmentation-specific or a task-specific representation through neural interaction. We design the interactive learning process by leveraging a bipartite graph architecture to model the representation learning and merging between contextual clusters and each user segmentation. Our proposed method is evaluated in two open-source benchmarks, two offline business datasets, and deployed on two online marketing applications to predict users' CVR. The results demonstrate that our method can achieve superior performance and surpass the baseline methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14945v1-abstract-full').style.display = 'none'; document.getElementById('2409.14945v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14324">arXiv:2409.14324</a> <span> [<a href="https://arxiv.org/pdf/2409.14324">pdf</a>, <a href="https://arxiv.org/format/2409.14324">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Unveiling Narrative Reasoning Limits of Large Language Models with Trope in Movie Synopses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+H">Hung-Ting Su</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+Y">Ya-Ching Hsu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xudong Lin</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiang-Qian Shi</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Y">Yulei Niu</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+H">Han-Yuan Hsu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hung-yi Lee</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+W+H">Winston H. Hsu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14324v1-abstract-short" style="display: inline;"> Large language models (LLMs) equipped with chain-of-thoughts (CoT) prompting have shown significant multi-step reasoning capabilities in factual content like mathematics, commonsense, and logic. However, their performance in narrative reasoning, which demands greater abstraction capabilities, remains unexplored. This study utilizes tropes in movie synopses to assess the abstract reasoning abilitie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14324v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14324v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14324v1-abstract-full" style="display: none;"> Large language models (LLMs) equipped with chain-of-thoughts (CoT) prompting have shown significant multi-step reasoning capabilities in factual content like mathematics, commonsense, and logic. However, their performance in narrative reasoning, which demands greater abstraction capabilities, remains unexplored. This study utilizes tropes in movie synopses to assess the abstract reasoning abilities of state-of-the-art LLMs and uncovers their low performance. We introduce a trope-wise querying approach to address these challenges and boost the F1 score by 11.8 points. Moreover, while prior studies suggest that CoT enhances multi-step reasoning, this study shows CoT can cause hallucinations in narrative content, reducing GPT-4's performance. We also introduce an Adversarial Injection method to embed trope-related text tokens into movie synopses without explicit tropes, revealing CoT's heightened sensitivity to such injections. Our comprehensive analysis provides insights for future research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14324v1-abstract-full').style.display = 'none'; document.getElementById('2409.14324v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024 Findings. The first two authors contributed equally. Code: https://github.com/Shelley1214/Trope</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13095">arXiv:2409.13095</a> <span> [<a href="https://arxiv.org/pdf/2409.13095">pdf</a>, <a href="https://arxiv.org/format/2409.13095">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Personalized Speech Recognition for Children with Test-Time Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhonghao Shi</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+H">Harshvardhan Srivastava</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xuan Shi</a>, <a href="/search/cs?searchtype=author&query=Narayanan%2C+S">Shrikanth Narayanan</a>, <a href="/search/cs?searchtype=author&query=Matari%C4%87%2C+M+J">Maja J. Matari膰</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13095v1-abstract-short" style="display: inline;"> Accurate automatic speech recognition (ASR) for children is crucial for effective real-time child-AI interaction, especially in educational applications. However, off-the-shelf ASR models primarily pre-trained on adult data tend to generalize poorly to children's speech due to the data domain shift from adults to children. Recent studies have found that supervised fine-tuning on children's speech… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13095v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13095v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13095v1-abstract-full" style="display: none;"> Accurate automatic speech recognition (ASR) for children is crucial for effective real-time child-AI interaction, especially in educational applications. However, off-the-shelf ASR models primarily pre-trained on adult data tend to generalize poorly to children's speech due to the data domain shift from adults to children. Recent studies have found that supervised fine-tuning on children's speech data can help bridge this domain shift, but human annotations may be impractical to obtain for real-world applications and adaptation at training time can overlook additional domain shifts occurring at test time. We devised a novel ASR pipeline to apply unsupervised test-time adaptation (TTA) methods for child speech recognition, so that ASR models pre-trained on adult speech can be continuously adapted to each child speaker at test time without further human annotations. Our results show that ASR models adapted with TTA methods significantly outperform the unadapted off-the-shelf ASR baselines both on average and statistically across individual child speakers. Our analysis also discovered significant data domain shifts both between child speakers and within each child speaker, which further motivates the need for test-time adaptation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13095v1-abstract-full').style.display = 'none'; document.getElementById('2409.13095v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09583">arXiv:2409.09583</a> <span> [<a href="https://arxiv.org/pdf/2409.09583">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Machine learning assisted screening of metal binary alloys for anode materials </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xingyue Shi</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+L">Linming Zhou</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuhui Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yongjun Wu</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Z">Zijian Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09583v1-abstract-short" style="display: inline;"> In the dynamic and rapidly advancing battery field, alloy anode materials are a focal point due to their superior electrochemical performance. Traditional screening methods are inefficient and time-consuming. Our research introduces a machine learning-assisted strategy to expedite the discovery and optimization of these materials. We compiled a vast dataset from the MP and AFLOW databases, encompa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09583v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09583v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09583v1-abstract-full" style="display: none;"> In the dynamic and rapidly advancing battery field, alloy anode materials are a focal point due to their superior electrochemical performance. Traditional screening methods are inefficient and time-consuming. Our research introduces a machine learning-assisted strategy to expedite the discovery and optimization of these materials. We compiled a vast dataset from the MP and AFLOW databases, encompassing tens of thousands of alloy compositions and properties. Utilizing a CGCNN, we accurately predicted the potential and specific capacity of alloy anodes, validated against experimental data. This approach identified approximately 120 low potential and high specific capacity alloy anodes suitable for various battery systems including Li, Na, K, Zn, Mg, Ca, and Al-based. Our method not only streamlines the screening of battery anode materials but also propels the advancement of battery material research and innovation in energy storage technology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09583v1-abstract-full').style.display = 'none'; document.getElementById('2409.09583v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">41 pages include SI, 5 figures in main</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09340">arXiv:2409.09340</a> <span> [<a href="https://arxiv.org/pdf/2409.09340">pdf</a>, <a href="https://arxiv.org/format/2409.09340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Egocentric Speaker Classification in Child-Adult Dyadic Interactions: From Sensing to Computational Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tiantian Feng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+A">Anfeng Xu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xuan Shi</a>, <a href="/search/cs?searchtype=author&query=Bishop%2C+S">Somer Bishop</a>, <a href="/search/cs?searchtype=author&query=Narayanan%2C+S">Shrikanth Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09340v1-abstract-short" style="display: inline;"> Autism spectrum disorder (ASD) is a neurodevelopmental condition characterized by challenges in social communication, repetitive behavior, and sensory processing. One important research area in ASD is evaluating children's behavioral changes over time during treatment. The standard protocol with this objective is BOSCC, which involves dyadic interactions between a child and clinicians performing a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09340v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09340v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09340v1-abstract-full" style="display: none;"> Autism spectrum disorder (ASD) is a neurodevelopmental condition characterized by challenges in social communication, repetitive behavior, and sensory processing. One important research area in ASD is evaluating children's behavioral changes over time during treatment. The standard protocol with this objective is BOSCC, which involves dyadic interactions between a child and clinicians performing a pre-defined set of activities. A fundamental aspect of understanding children's behavior in these interactions is automatic speech understanding, particularly identifying who speaks and when. Conventional approaches in this area heavily rely on speech samples recorded from a spectator perspective, and there is limited research on egocentric speech modeling. In this study, we design an experiment to perform speech sampling in BOSCC interviews from an egocentric perspective using wearable sensors and explore pre-training Ego4D speech samples to enhance child-adult speaker classification in dyadic interactions. Our findings highlight the potential of egocentric speech collection and pre-training to improve speaker classification accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09340v1-abstract-full').style.display = 'none'; document.getElementById('2409.09340v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">pre-print under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06381">arXiv:2409.06381</a> <span> [<a href="https://arxiv.org/pdf/2409.06381">pdf</a>, <a href="https://arxiv.org/format/2409.06381">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Cross-Font Image Retrieval Network for Recognizing Undeciphered Oracle Bone Inscriptions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhicong Wu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Q">Qifeng Su</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+K">Ke Gu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaodong Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06381v1-abstract-short" style="display: inline;"> Oracle Bone Inscription (OBI) is the earliest mature writing system known in China to date, which represents a crucial stage in the development of hieroglyphs. Nevertheless, the substantial quantity of undeciphered OBI characters continues to pose a persistent challenge for scholars, while conventional methods of ancient script research are both time-consuming and labor-intensive. In this paper, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06381v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06381v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06381v1-abstract-full" style="display: none;"> Oracle Bone Inscription (OBI) is the earliest mature writing system known in China to date, which represents a crucial stage in the development of hieroglyphs. Nevertheless, the substantial quantity of undeciphered OBI characters continues to pose a persistent challenge for scholars, while conventional methods of ancient script research are both time-consuming and labor-intensive. In this paper, we propose a cross-font image retrieval network (CFIRN) to decipher OBI characters by establishing associations between OBI characters and other script forms, simulating the interpretive behavior of paleography scholars. Concretely, our network employs a siamese framework to extract deep features from character images of various fonts, fully exploring structure clues with different resolution by designed multiscale feature integration (MFI) module and multiscale refinement classifier (MRC). Extensive experiments on three challenging cross-font image retrieval datasets demonstrate that, given undeciphered OBI characters, our CFIRN can effectively achieve accurate matches with characters from other gallery fonts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06381v1-abstract-full').style.display = 'none'; document.getElementById('2409.06381v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05331">arXiv:2409.05331</a> <span> [<a href="https://arxiv.org/pdf/2409.05331">pdf</a>, <a href="https://arxiv.org/format/2409.05331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Towards Practical Overlay Networks for Decentralized Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yifan Hua</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+J">Jinlong Pang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaoxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yi Liu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaofeng Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bao Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+C">Chen Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05331v1-abstract-short" style="display: inline;"> Decentralized federated learning (DFL) uses peer-to-peer communication to avoid the single point of failure problem in federated learning and has been considered an attractive solution for machine learning tasks on distributed devices. We provide the first solution to a fundamental network problem of DFL: what overlay network should DFL use to achieve fast training of highly accurate models, low c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05331v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05331v1-abstract-full" style="display: none;"> Decentralized federated learning (DFL) uses peer-to-peer communication to avoid the single point of failure problem in federated learning and has been considered an attractive solution for machine learning tasks on distributed devices. We provide the first solution to a fundamental network problem of DFL: what overlay network should DFL use to achieve fast training of highly accurate models, low communication, and decentralized construction and maintenance? Overlay topologies of DFL have been investigated, but no existing DFL topology includes decentralized protocols for network construction and topology maintenance. Without these protocols, DFL cannot run in practice. This work presents an overlay network, called FedLay, which provides fast training and low communication cost for practical DFL. FedLay is the first solution for constructing near-random regular topologies in a decentralized manner and maintaining the topologies under node joins and failures. Experiments based on prototype implementation and simulations show that FedLay achieves the fastest model convergence and highest accuracy on real datasets compared to existing DFL solutions while incurring small communication costs and being resilient to node joins and failures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05331v1-abstract-full').style.display = 'none'; document.getElementById('2409.05331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11856">arXiv:2408.11856</a> <span> [<a href="https://arxiv.org/pdf/2408.11856">pdf</a>, <a href="https://arxiv.org/format/2408.11856">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Adaptive Optimization for Effective Sentiment Analysis Fine-Tuning on Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+H">Hongcheng Ding</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xuanze Zhao</a>, <a href="/search/cs?searchtype=author&query=Abdullah%2C+S+N">Shamsul Nahar Abdullah</a>, <a href="/search/cs?searchtype=author&query=Dewi%2C+D+A">Deshinta Arrova Dewi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zixiao Jiang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiangyu Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11856v2-abstract-short" style="display: inline;"> Sentiment analysis plays a crucial role in various domains, such as business intelligence and financial forecasting. Large language models (LLMs) have become a popular paradigm for sentiment analysis, leveraging multi-task learning to address specific tasks concurrently. However, LLMs with fine-tuning for sentiment analysis often underperforms due to the inherent challenges in managing diverse tas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11856v2-abstract-full').style.display = 'inline'; document.getElementById('2408.11856v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11856v2-abstract-full" style="display: none;"> Sentiment analysis plays a crucial role in various domains, such as business intelligence and financial forecasting. Large language models (LLMs) have become a popular paradigm for sentiment analysis, leveraging multi-task learning to address specific tasks concurrently. However, LLMs with fine-tuning for sentiment analysis often underperforms due to the inherent challenges in managing diverse task complexities. Moreover, constant-weight approaches in multi-task learning struggle to adapt to variations in data characteristics, further complicating model effectiveness. To address these issues, we propose a novel multi-task learning framework with a dynamic adaptive optimization (DAO) module. This module is designed as a plug-and-play component that can be seamlessly integrated into existing models, providing an effective and flexible solution for multi-task learning. The key component of the DAO module is dynamic adaptive loss, which dynamically adjusts the weights assigned to different tasks based on their relative importance and data characteristics during training. Sentiment analyses on a standard and customized financial text dataset demonstrate that the proposed framework achieves superior performance. Specifically, this work improves the Mean Squared Error (MSE) and Accuracy (ACC) by 15.58% and 1.24% respectively, compared with previous work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11856v2-abstract-full').style.display = 'none'; document.getElementById('2408.11856v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08034">arXiv:2408.08034</a> <span> [<a href="https://arxiv.org/pdf/2408.08034">pdf</a>, <a href="https://arxiv.org/format/2408.08034">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Centralized Network Utility Maximization with Accelerated Gradient Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ying Tian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhiliang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+X">Xia Yin</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xingang Shi</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiahai Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Han Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08034v2-abstract-short" style="display: inline;"> Network utility maximization (NUM) is a well-studied problem for network traffic management and resource allocation. Because of the inherent decentralization and complexity of networks, most researches develop decentralized NUM algorithms. In recent years, the Software Defined Networking (SDN) architecture has been widely used, especially in cloud networks and inter-datacenter networks managed by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08034v2-abstract-full').style.display = 'inline'; document.getElementById('2408.08034v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08034v2-abstract-full" style="display: none;"> Network utility maximization (NUM) is a well-studied problem for network traffic management and resource allocation. Because of the inherent decentralization and complexity of networks, most researches develop decentralized NUM algorithms. In recent years, the Software Defined Networking (SDN) architecture has been widely used, especially in cloud networks and inter-datacenter networks managed by large enterprises, promoting the design of centralized NUM algorithms. To cope with the large and increasing number of flows in such SDN networks, existing researches about centralized NUM focus on the scalability of the algorithm with respect to the number of flows, however the efficiency is ignored. In this paper, we focus on the SDN scenario, and derive a centralized, efficient and scalable algorithm for the NUM problem. By the designing of a smooth utility function and a smooth penalty function, we formulate the NUM problem with a smooth objective function, which enables the use of Nesterov's accelerated gradient method. We prove that the proposed method has $O(d/t^2)$ convergence rate, which is the fastest with respect to the number of iterations $t$, and our method is scalable with respect to the number of flows $d$ in the network. Experiments show that our method obtains accurate solutions with less iterations, and achieves close-to-optimal network utility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08034v2-abstract-full').style.display = 'none'; document.getElementById('2408.08034v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2022 IEEE 30th International Conference on Network Protocols (ICNP), pp. 1-11 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07709">arXiv:2408.07709</a> <span> [<a href="https://arxiv.org/pdf/2408.07709">pdf</a>, <a href="https://arxiv.org/format/2408.07709">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Pretrained-Guided Conditional Diffusion Models for Microbiome Data Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xinyuan Shi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+F">Fangfang Zhu</a>, <a href="/search/cs?searchtype=author&query=Min%2C+W">Wenwen Min</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07709v1-abstract-short" style="display: inline;"> Emerging evidence indicates that human cancers are intricately linked to human microbiomes, forming an inseparable connection. However, due to limited sample sizes and significant data loss during collection for various reasons, some machine learning methods have been proposed to address the issue of missing data. These methods have not fully utilized the known clinical information of patients to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07709v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07709v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07709v1-abstract-full" style="display: none;"> Emerging evidence indicates that human cancers are intricately linked to human microbiomes, forming an inseparable connection. However, due to limited sample sizes and significant data loss during collection for various reasons, some machine learning methods have been proposed to address the issue of missing data. These methods have not fully utilized the known clinical information of patients to enhance the accuracy of data imputation. Therefore, we introduce mbVDiT, a novel pre-trained conditional diffusion model for microbiome data imputation and denoising, which uses the unmasked data and patient metadata as conditional guidance for imputating missing values. It is also uses VAE to integrate the the other public microbiome datasets to enhance model performance. The results on the microbiome datasets from three different cancer types demonstrate the performance of our methods in comparison with existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07709v1-abstract-full').style.display = 'none'; document.getElementById('2408.07709v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03944">arXiv:2408.03944</a> <span> [<a href="https://arxiv.org/pdf/2408.03944">pdf</a>, <a href="https://arxiv.org/format/2408.03944">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Fast Adversarial Training Paradigm: An Example Taxonomy Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gui%2C+J">Jie Gui</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chengze Jiang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+M">Minjing Dong</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+K">Kun Tong</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xinli Shi</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y+Y">Yuan Yan Tang</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03944v2-abstract-short" style="display: inline;"> While adversarial training is an effective defense method against adversarial attacks, it notably increases the training cost. To this end, fast adversarial training (FAT) is presented for efficient training and has become a hot research topic. However, FAT suffers from catastrophic overfitting, which leads to a performance drop compared with multi-step adversarial training. However, the cause of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03944v2-abstract-full').style.display = 'inline'; document.getElementById('2408.03944v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03944v2-abstract-full" style="display: none;"> While adversarial training is an effective defense method against adversarial attacks, it notably increases the training cost. To this end, fast adversarial training (FAT) is presented for efficient training and has become a hot research topic. However, FAT suffers from catastrophic overfitting, which leads to a performance drop compared with multi-step adversarial training. However, the cause of catastrophic overfitting remains unclear and lacks exploration. In this paper, we present an example taxonomy in FAT, which identifies that catastrophic overfitting is caused by the imbalance between the inner and outer optimization in FAT. Furthermore, we investigated the impact of varying degrees of training loss, revealing a correlation between training loss and catastrophic overfitting. Based on these observations, we redesign the loss function in FAT with the proposed dynamic label relaxation to concentrate the loss range and reduce the impact of misclassified examples. Meanwhile, we introduce batch momentum initialization to enhance the diversity to prevent catastrophic overfitting in an efficient manner. Furthermore, we also propose Catastrophic Overfitting aware Loss Adaptation (COLA), which employs a separate training strategy for examples based on their loss degree. Our proposed method, named example taxonomy aware FAT (ETA), establishes an improved paradigm for FAT. Experiment results demonstrate our ETA achieves state-of-the-art performance. Comprehensive experiments on four standard datasets demonstrate the competitiveness of our proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03944v2-abstract-full').style.display = 'none'; document.getElementById('2408.03944v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00601">arXiv:2408.00601</a> <span> [<a href="https://arxiv.org/pdf/2408.00601">pdf</a>, <a href="https://arxiv.org/format/2408.00601">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AutoPV: Automatically Design Your Photovoltaic Power Forecasting Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+D">Dayin Chen</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaodan Shi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+M">Mingkun Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haoran Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dongxiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuntian Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Jinyue Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00601v1-abstract-short" style="display: inline;"> Photovoltaic power forecasting (PVPF) is a critical area in time series forecasting (TSF), enabling the efficient utilization of solar energy. With advancements in machine learning and deep learning, various models have been applied to PVPF tasks. However, constructing an optimal predictive architecture for specific PVPF tasks remains challenging, as it requires cross-domain knowledge and signific… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00601v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00601v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00601v1-abstract-full" style="display: none;"> Photovoltaic power forecasting (PVPF) is a critical area in time series forecasting (TSF), enabling the efficient utilization of solar energy. With advancements in machine learning and deep learning, various models have been applied to PVPF tasks. However, constructing an optimal predictive architecture for specific PVPF tasks remains challenging, as it requires cross-domain knowledge and significant labor costs. To address this challenge, we introduce AutoPV, a novel framework for the automated search and construction of PVPF models based on neural architecture search (NAS) technology. We develop a brand new NAS search space that incorporates various data processing techniques from state-of-the-art (SOTA) TSF models and typical PVPF deep learning models. The effectiveness of AutoPV is evaluated on diverse PVPF tasks using a dataset from the Daqing Photovoltaic Station in China. Experimental results demonstrate that AutoPV can complete the predictive architecture construction process in a relatively short time, and the newly constructed architecture is superior to SOTA predefined models. This work bridges the gap in applying NAS to TSF problems, assisting non-experts and industries in automatically designing effective PVPF models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00601v1-abstract-full').style.display = 'none'; document.getElementById('2408.00601v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21566">arXiv:2407.21566</a> <span> [<a href="https://arxiv.org/pdf/2407.21566">pdf</a>, <a href="https://arxiv.org/format/2407.21566">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TRGR: Transmissive RIS-aided Gait Recognition Through Walls </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yunlong Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junshuo Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianan Zhang</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+T">Tiebin Mi</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xin Shi</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+R+C">Robert Caiming Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21566v1-abstract-short" style="display: inline;"> Gait recognition with radio frequency (RF) signals enables many potential applications requiring accurate identification. However, current systems require individuals to be within a line-of-sight (LOS) environment and struggle with low signal-to-noise ratio (SNR) when signals traverse concrete and thick walls. To address these challenges, we present TRGR, a novel transmissive reconfigurable intell… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21566v1-abstract-full').style.display = 'inline'; document.getElementById('2407.21566v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21566v1-abstract-full" style="display: none;"> Gait recognition with radio frequency (RF) signals enables many potential applications requiring accurate identification. However, current systems require individuals to be within a line-of-sight (LOS) environment and struggle with low signal-to-noise ratio (SNR) when signals traverse concrete and thick walls. To address these challenges, we present TRGR, a novel transmissive reconfigurable intelligent surface (RIS)-aided gait recognition system. TRGR can recognize human identities through walls using only the magnitude measurements of channel state information (CSI) from a pair of transceivers. Specifically, by leveraging transmissive RIS alongside a configuration alternating optimization algorithm, TRGR enhances wall penetration and signal quality, enabling accurate gait recognition. Furthermore, a residual convolution network (RCNN) is proposed as the backbone network to learn robust human information. Experimental results confirm the efficacy of transmissive RIS, highlighting the significant potential of transmissive RIS in enhancing RF-based gait recognition systems. Extensive experiment results show that TRGR achieves an average accuracy of 97.88\% in identifying persons when signals traverse concrete walls, demonstrating the effectiveness and robustness of TRGR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21566v1-abstract-full').style.display = 'none'; document.getElementById('2407.21566v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Globecom 2024 IoTSN accepted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21045">arXiv:2407.21045</a> <span> [<a href="https://arxiv.org/pdf/2407.21045">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unlocking the Potential: Benchmarking Large Language Models in Water Engineering and Research </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+B">Boyan Xu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+L">Liang Wen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zihao Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuxing Yang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+G">Guanlan Wu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xiongpeng Tang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yu Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zihao Wu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Q">Qingxian Su</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xueqing Shi</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yue Yang</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+R">Rui Tong</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+H+Y">How Yong Ng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21045v1-abstract-short" style="display: inline;"> Recent advancements in Large Language Models (LLMs) have sparked interest in their potential applications across various fields. This paper embarked on a pivotal inquiry: Can existing LLMs effectively serve as "water expert models" for water engineering and research tasks? This study was the first to evaluate LLMs' contributions across various water engineering and research tasks by establishing a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21045v1-abstract-full').style.display = 'inline'; document.getElementById('2407.21045v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21045v1-abstract-full" style="display: none;"> Recent advancements in Large Language Models (LLMs) have sparked interest in their potential applications across various fields. This paper embarked on a pivotal inquiry: Can existing LLMs effectively serve as "water expert models" for water engineering and research tasks? This study was the first to evaluate LLMs' contributions across various water engineering and research tasks by establishing a domain-specific benchmark suite, namely, WaterER. Herein, we prepared 983 tasks related to water engineering and research, categorized into "wastewater treatment", "environmental restoration", "drinking water treatment and distribution", "sanitation", "anaerobic digestion" and "contaminants assessment". We evaluated the performance of seven LLMs (i.e., GPT-4, GPT-3.5, Gemini, GLM-4, ERNIE, QWEN and Llama3) on these tasks. We highlighted the strengths of GPT-4 in handling diverse and complex tasks of water engineering and water research, the specialized capabilities of Gemini in academic contexts, Llama3's strongest capacity to answer Chinese water engineering questions and the competitive performance of Chinese-oriented models like GLM-4, ERNIE and QWEN in some water engineering tasks. More specifically, current LLMs excelled particularly in generating precise research gaps for papers on "contaminants and related water quality monitoring and assessment". Additionally, they were more adept at creating appropriate titles for research papers on "treatment processes for wastewaters", "environmental restoration", and "drinking water treatment". Overall, this study pioneered evaluating LLMs in water engineering and research by introducing the WaterER benchmark to assess the trustworthiness of their predictions. This standardized evaluation framework would also drive future advancements in LLM technology by using targeting datasets, propelling these models towards becoming true "water expert". <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21045v1-abstract-full').style.display = 'none'; document.getElementById('2407.21045v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20906">arXiv:2407.20906</a> <span> [<a href="https://arxiv.org/pdf/2407.20906">pdf</a>, <a href="https://arxiv.org/format/2407.20906">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Analysis, Statistics and Probability">physics.data-an</span> </div> </div> <p class="title is-5 mathjax"> Automated Review Generation Method Based on Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+S">Shican Wu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xiao Ma</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+D">Dehui Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lulu Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiangcheng Shi</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+X">Xin Chang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xiaoyun Lin</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+R">Ran Luo</a>, <a href="/search/cs?searchtype=author&query=Pei%2C+C">Chunlei Pei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhi-Jian Zhao</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+J">Jinlong Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20906v1-abstract-short" style="display: inline;"> Literature research, vital for scientific advancement, is overwhelmed by the vast ocean of available information. Addressing this, we propose an automated review generation method based on Large Language Models (LLMs) to streamline literature processing and reduce cognitive load. In case study on propane dehydrogenation (PDH) catalysts, our method swiftly generated comprehensive reviews from 343 a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20906v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20906v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20906v1-abstract-full" style="display: none;"> Literature research, vital for scientific advancement, is overwhelmed by the vast ocean of available information. Addressing this, we propose an automated review generation method based on Large Language Models (LLMs) to streamline literature processing and reduce cognitive load. In case study on propane dehydrogenation (PDH) catalysts, our method swiftly generated comprehensive reviews from 343 articles, averaging seconds per article per LLM account. Extended analysis of 1041 articles provided deep insights into catalysts' composition, structure, and performance. Recognizing LLMs' hallucinations, we employed a multi-layered quality control strategy, ensuring our method's reliability and effective hallucination mitigation. Expert verification confirms the accuracy and citation integrity of generated reviews, demonstrating LLM hallucination risks reduced to below 0.5% with over 95% confidence. Released Windows application enables one-click review generation, aiding researchers in tracking advancements and recommending literature. This approach showcases LLMs' role in enhancing scientific research productivity and sets the stage for further exploration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20906v1-abstract-full').style.display = 'none'; document.getElementById('2407.20906v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 3 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18626">arXiv:2407.18626</a> <span> [<a href="https://arxiv.org/pdf/2407.18626">pdf</a>, <a href="https://arxiv.org/format/2407.18626">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Every Part Matters: Integrity Verification of Scientific Figures Based on Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiang Shi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yinpeng Liu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Q">Qikai Cheng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+W">Wei Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18626v1-abstract-short" style="display: inline;"> This paper tackles a key issue in the interpretation of scientific figures: the fine-grained alignment of text and figures. It advances beyond prior research that primarily dealt with straightforward, data-driven visualizations such as bar and pie charts and only offered a basic understanding of diagrams through captioning and classification. We introduce a novel task, Figure Integrity Verificatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18626v1-abstract-full').style.display = 'inline'; document.getElementById('2407.18626v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18626v1-abstract-full" style="display: none;"> This paper tackles a key issue in the interpretation of scientific figures: the fine-grained alignment of text and figures. It advances beyond prior research that primarily dealt with straightforward, data-driven visualizations such as bar and pie charts and only offered a basic understanding of diagrams through captioning and classification. We introduce a novel task, Figure Integrity Verification, designed to evaluate the precision of technologies in aligning textual knowledge with visual elements in scientific figures. To support this, we develop a semi-automated method for constructing a large-scale dataset, Figure-seg, specifically designed for this task. Additionally, we propose an innovative framework, Every Part Matters (EPM), which leverages Multimodal Large Language Models (MLLMs) to not only incrementally improve the alignment and verification of text-figure integrity but also enhance integrity through analogical reasoning. Our comprehensive experiments show that these innovations substantially improve upon existing methods, allowing for more precise and thorough analysis of complex scientific figures. This progress not only enhances our understanding of multimodal technologies but also stimulates further research and practical applications across fields requiring the accurate interpretation of complex visual data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18626v1-abstract-full').style.display = 'none'; document.getElementById('2407.18626v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 11 figures, under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17734">arXiv:2407.17734</a> <span> [<a href="https://arxiv.org/pdf/2407.17734">pdf</a>, <a href="https://arxiv.org/format/2407.17734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cost-effective Instruction Learning for Pathology Vision and Language Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kaitao Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mianxin Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+F">Fang Yan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lei Ma</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoming Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lilong Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaosong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lifeng Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhe Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mu Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shaoting Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17734v1-abstract-short" style="display: inline;"> The advent of vision-language models fosters the interactive conversations between AI-enabled models and humans. Yet applying these models into clinics must deal with daunting challenges around large-scale training data, financial, and computational resources. Here we propose a cost-effective instruction learning framework for conversational pathology named as CLOVER. CLOVER only trains a lightwei… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17734v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17734v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17734v1-abstract-full" style="display: none;"> The advent of vision-language models fosters the interactive conversations between AI-enabled models and humans. Yet applying these models into clinics must deal with daunting challenges around large-scale training data, financial, and computational resources. Here we propose a cost-effective instruction learning framework for conversational pathology named as CLOVER. CLOVER only trains a lightweight module and uses instruction tuning while freezing the parameters of the large language model. Instead of using costly GPT-4, we propose well-designed prompts on GPT-3.5 for building generation-based instructions, emphasizing the utility of pathological knowledge derived from the Internet source. To augment the use of instructions, we construct a high-quality set of template-based instructions in the context of digital pathology. From two benchmark datasets, our findings reveal the strength of hybrid-form instructions in the visual question-answer in pathology. Extensive results show the cost-effectiveness of CLOVER in answering both open-ended and closed-ended questions, where CLOVER outperforms strong baselines that possess 37 times more training parameters and use instruction data generated from GPT-4. Through the instruction tuning, CLOVER exhibits robustness of few-shot learning in the external clinical dataset. These findings demonstrate that cost-effective modeling of CLOVER could accelerate the adoption of rapid conversational applications in the landscape of digital pathology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17734v1-abstract-full').style.display = 'none'; document.getElementById('2407.17734v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16600">arXiv:2407.16600</a> <span> [<a href="https://arxiv.org/pdf/2407.16600">pdf</a>, <a href="https://arxiv.org/format/2407.16600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DHGS: Decoupled Hybrid Gaussian Splatting for Driving Scene </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xi Shi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lingli Chen</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+P">Peng Wei</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xi Wu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+T">Tian Jiang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yonggang Luo</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+L">Lecheng Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16600v3-abstract-short" style="display: inline;"> Existing Gaussian splatting methods often fall short in achieving satisfactory novel view synthesis in driving scenes, primarily due to the absence of crafty designs and geometric constraints for the involved elements. This paper introduces a novel neural rendering method termed Decoupled Hybrid Gaussian Splatting (DHGS), targeting at promoting the rendering quality of novel view synthesis for sta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16600v3-abstract-full').style.display = 'inline'; document.getElementById('2407.16600v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16600v3-abstract-full" style="display: none;"> Existing Gaussian splatting methods often fall short in achieving satisfactory novel view synthesis in driving scenes, primarily due to the absence of crafty designs and geometric constraints for the involved elements. This paper introduces a novel neural rendering method termed Decoupled Hybrid Gaussian Splatting (DHGS), targeting at promoting the rendering quality of novel view synthesis for static driving scenes. The novelty of this work lies in the decoupled and hybrid pixel-level blender for road and non-road layers, without the conventional unified differentiable rendering logic for the entire scene. Still, consistency and continuity in superimposition are preserved through the proposed depth-ordered hybrid rendering strategy. Additionally, an implicit road representation comprised of a Signed Distance Function (SDF) is trained to supervise the road surface with subtle geometric attributes. Accompanied by the use of auxiliary transmittance loss and consistency loss, novel images with imperceptible boundary and elevated fidelity are ultimately obtained. Substantial experiments on the Waymo dataset prove that DHGS outperforms the state-of-the-art methods. The project page where more video evidences are given is: https://ironbrotherstyle.github.io/dhgs_web. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16600v3-abstract-full').style.display = 'none'; document.getElementById('2407.16600v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 14 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13964">arXiv:2407.13964</a> <span> [<a href="https://arxiv.org/pdf/2407.13964">pdf</a>, <a href="https://arxiv.org/format/2407.13964">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Persuading while Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arieli%2C+I">Itai Arieli</a>, <a href="/search/cs?searchtype=author&query=Babichenko%2C+Y">Yakov Babichenko</a>, <a href="/search/cs?searchtype=author&query=Shaiderman%2C+D">Dimitry Shaiderman</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xianwen Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13964v1-abstract-short" style="display: inline;"> We propose a dynamic product adoption persuasion model involving an impatient partially informed sender who gradually learns the state. In this model, the sender gathers information over time, and hence her posteriors' sequence forms a discrete-time martingale. The sender commits to a dynamic revelation policy to persuade the agent to adopt a product. We demonstrate that under the assumption that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13964v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13964v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13964v1-abstract-full" style="display: none;"> We propose a dynamic product adoption persuasion model involving an impatient partially informed sender who gradually learns the state. In this model, the sender gathers information over time, and hence her posteriors' sequence forms a discrete-time martingale. The sender commits to a dynamic revelation policy to persuade the agent to adopt a product. We demonstrate that under the assumption that the sender's martingale possesses Blackwell-preserving kernels, the family of optimal strategies for the sender takes an interval form; namely, in every period the set of martingale realizations in which adoption occurs is an interval. Utilizing this, we prove that if the sender is sufficiently impatient, then under a random walk martingale, the optimal policy is fully transparent up to the moment of adoption; namely, the sender reveals the entire information she privately holds in every period. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13964v1-abstract-full').style.display = 'none'; document.getElementById('2407.13964v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10990">arXiv:2407.10990</a> <span> [<a href="https://arxiv.org/pdf/2407.10990">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MedBench: A Comprehensive, Standardized, and Reliable Benchmarking System for Evaluating Chinese Medical Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mianxin Liu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+J">Jinru Ding</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+W">Weiguo Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoyang Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lifeng Zhu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Z">Zhian Bai</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoming Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Benyou Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Haitao Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Pengfei Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaofan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shanshan Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kang Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haofen Wang</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+T">Tong Ruan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xin Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shaoting Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10990v1-abstract-short" style="display: inline;"> Ensuring the general efficacy and goodness for human beings from medical large language models (LLM) before real-world deployment is crucial. However, a widely accepted and accessible evaluation process for medical LLM, especially in the Chinese context, remains to be established. In this work, we introduce "MedBench", a comprehensive, standardized, and reliable benchmarking system for Chinese med… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10990v1-abstract-full').style.display = 'inline'; document.getElementById('2407.10990v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10990v1-abstract-full" style="display: none;"> Ensuring the general efficacy and goodness for human beings from medical large language models (LLM) before real-world deployment is crucial. However, a widely accepted and accessible evaluation process for medical LLM, especially in the Chinese context, remains to be established. In this work, we introduce "MedBench", a comprehensive, standardized, and reliable benchmarking system for Chinese medical LLM. First, MedBench assembles the currently largest evaluation dataset (300,901 questions) to cover 43 clinical specialties and performs multi-facet evaluation on medical LLM. Second, MedBench provides a standardized and fully automatic cloud-based evaluation infrastructure, with physical separations for question and ground truth. Third, MedBench implements dynamic evaluation mechanisms to prevent shortcut learning and answer remembering. Applying MedBench to popular general and medical LLMs, we observe unbiased, reproducible evaluation results largely aligning with medical professionals' perspectives. This study establishes a significant foundation for preparing the practical applications of Chinese medical LLMs. MedBench is publicly accessible at https://medbench.opencompass.org.cn. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10990v1-abstract-full').style.display = 'none'; document.getElementById('2407.10990v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages.4 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Shi%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository