CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 186 results for author: <span class="mathjax">Ning, X</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ning, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ning%2C+X&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ning, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14355">arXiv:2411.14355</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.14355">pdf</a>, <a href="https://arxiv.org/format/2411.14355">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Nuclear Experiment">nucl-ex</span> </div> </div> <p class="title is-5 mathjax"> Measurement of two-neutrino double electron capture half-life of $^{124}$Xe with PandaX-4T </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=PandaX+Collaboration"> PandaX Collaboration</a>, <a href="/search/?searchtype=author&amp;query=Bo%2C+Z">Zihao Bo</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+X">Xun Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Y">Yunhua Chen</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+Z">Zhaokan Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cui%2C+X">Xiangyi Cui</a>, <a href="/search/?searchtype=author&amp;query=Fan%2C+Y">Yingjie Fan</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+D">Deqing Fang</a>, <a href="/search/?searchtype=author&amp;query=Gao%2C+Z">Zhixing Gao</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+L">Lisheng Geng</a>, <a href="/search/?searchtype=author&amp;query=Giboni%2C+K">Karl Giboni</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xunan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xuyuan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+Z">Zichao Guo</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+C">Chencheng Han</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+K">Ke Han</a>, <a href="/search/?searchtype=author&amp;query=He%2C+C">Changda He</a>, <a href="/search/?searchtype=author&amp;query=He%2C+J">Jinrong He</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+D">Di Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+H">Houqi Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+J">Junting Huang</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+R">Ruquan Hou</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+Y">Yu Hou</a>, <a href="/search/?searchtype=author&amp;query=Ji%2C+X">Xiangdong Ji</a> , et al. (77 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14355v1-abstract-short" style="display: inline;"> Detailed studies of two-neutrino double electron capture (2$谓$DEC) is a crucial step towards searching for the neutrino-less mode to explore the Majorana nature of neutrinos. We have measured precisely the half-life of the 2$谓$DEC process in $^{124}$Xe, utilizing a total exposure of 1.73 tonne$\cdot$year from the commissioning run and the first science run of the PandaX-4T experiment. A time-depen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14355v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14355v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14355v1-abstract-full" style="display: none;"> Detailed studies of two-neutrino double electron capture (2$谓$DEC) is a crucial step towards searching for the neutrino-less mode to explore the Majorana nature of neutrinos. We have measured precisely the half-life of the 2$谓$DEC process in $^{124}$Xe, utilizing a total exposure of 1.73 tonne$\cdot$year from the commissioning run and the first science run of the PandaX-4T experiment. A time-dependent background model in the $\mathcal{O}$(10 keV) energy is constructed for the first time in PandaX-4T data. With an unbinned maximum likelihood fit, we determine the half-life of the 2$谓$DEC process to be $(1.03\pm0.15_{\rm stat}\pm0.06_{\rm sys})\times 10^{22}$$\,$yr. Furthermore, we have evaluated the branching ratio for both electrons captured from the $K$ shell ($KK$) to be $(65\pm5)\%$, which aligns with the $^{124}$Xe nuclear model calculations within 1.5$\,$$蟽$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14355v1-abstract-full').style.display = 'none'; document.getElementById('2411.14355v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 5 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10948">arXiv:2411.10948</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10948">pdf</a>, <a href="https://arxiv.org/format/2411.10948">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Accurate and Efficient Sub-8-Bit Integer Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Guo%2C+W">Wenjin Guo</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+D">Donglai Liu</a>, <a href="/search/?searchtype=author&amp;query=Xie%2C+W">Weiying Xie</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+Y">Yunsong Li</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Meng%2C+Z">Zihan Meng</a>, <a href="/search/?searchtype=author&amp;query=Zeng%2C+S">Shulin Zeng</a>, <a href="/search/?searchtype=author&amp;query=Lei%2C+J">Jie Lei</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+Z">Zhenman Fang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10948v1-abstract-short" style="display: inline;"> Neural network training is a memory- and compute-intensive task. Quantization, which enables low-bitwidth formats in training, can significantly mitigate the workload. To reduce quantization error, recent methods have developed new data formats and additional pre-processing operations on quantizers. However, it remains quite challenging to achieve high accuracy and efficiency simultaneously. In th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10948v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10948v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10948v1-abstract-full" style="display: none;"> Neural network training is a memory- and compute-intensive task. Quantization, which enables low-bitwidth formats in training, can significantly mitigate the workload. To reduce quantization error, recent methods have developed new data formats and additional pre-processing operations on quantizers. However, it remains quite challenging to achieve high accuracy and efficiency simultaneously. In this paper, we explore sub-8-bit integer training from its essence of gradient descent optimization. Our integer training framework includes two components: ShiftQuant to realize accurate gradient estimation, and L1 normalization to smoothen the loss landscape. ShiftQuant attains performance that approaches the theoretical upper bound of group quantization. Furthermore, it liberates group quantization from inefficient memory rearrangement. The L1 normalization facilitates the implementation of fully quantized normalization layers with impressive convergence accuracy. Our method frees sub-8-bit integer training from pre-processing and supports general devices. This framework achieves negligible accuracy loss across various neural networks and tasks ($0.92\%$ on 4-bit ResNets, $0.61\%$ on 6-bit Transformers). The prototypical implementation of ShiftQuant achieves more than $1.85\times/15.3\%$ performance improvement on CPU/GPU compared to its FP16 counterparts, and $33.9\%$ resource consumption reduction on FPGA than the FP16 counterparts. The proposed fully-quantized L1 normalization layers achieve more than $35.54\%$ improvement in throughout on CPU compared to traditional L2 normalization layers. Moreover, theoretical analysis verifies the advancement of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10948v1-abstract-full').style.display = 'none'; document.getElementById('2411.10948v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07228">arXiv:2411.07228</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.07228">pdf</a>, <a href="https://arxiv.org/format/2411.07228">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Tooling or Not Tooling? The Impact of Tools on Language Agents for Chemistry Problem Solving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Yu%2C+B">Botao Yu</a>, <a href="/search/?searchtype=author&amp;query=Baker%2C+F+N">Frazier N. Baker</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Z">Ziru Chen</a>, <a href="/search/?searchtype=author&amp;query=Herb%2C+G">Garrett Herb</a>, <a href="/search/?searchtype=author&amp;query=Gou%2C+B">Boyu Gou</a>, <a href="/search/?searchtype=author&amp;query=Adu-Ampratwum%2C+D">Daniel Adu-Ampratwum</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xia Ning</a>, <a href="/search/?searchtype=author&amp;query=Sun%2C+H">Huan Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07228v1-abstract-short" style="display: inline;"> To enhance large language models (LLMs) for chemistry problem solving, several LLM-based agents augmented with tools have been proposed, such as ChemCrow and Coscientist. However, their evaluations are narrow in scope, leaving a large gap in understanding the benefits of tools across diverse chemistry tasks. To bridge this gap, we develop ChemAgent, an enhanced chemistry agent over ChemCrow, and c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07228v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07228v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07228v1-abstract-full" style="display: none;"> To enhance large language models (LLMs) for chemistry problem solving, several LLM-based agents augmented with tools have been proposed, such as ChemCrow and Coscientist. However, their evaluations are narrow in scope, leaving a large gap in understanding the benefits of tools across diverse chemistry tasks. To bridge this gap, we develop ChemAgent, an enhanced chemistry agent over ChemCrow, and conduct a comprehensive evaluation of its performance on both specialized chemistry tasks and general chemistry questions. Surprisingly, ChemAgent does not consistently outperform its base LLMs without tools. Our error analysis with a chemistry expert suggests that: For specialized chemistry tasks, such as synthesis prediction, we should augment agents with specialized tools; however, for general chemistry questions like those in exams, agents&#39; ability to reason correctly with chemistry knowledge matters more, and tool augmentation does not always help. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07228v1-abstract-full').style.display = 'none'; document.getElementById('2411.07228v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06091">arXiv:2411.06091</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06091">pdf</a>, <a href="https://arxiv.org/format/2411.06091">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Pattern Integration and Enhancement Vision Transformer for Self-Supervised Learning in Remote Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Lu%2C+K">Kaixuan Lu</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+R">Ruiqian Zhang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+X">Xiao Huang</a>, <a href="/search/?searchtype=author&amp;query=Xie%2C+Y">Yuxing Xie</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xiaogang Ning</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+H">Hanchao Zhang</a>, <a href="/search/?searchtype=author&amp;query=Yuan%2C+M">Mengke Yuan</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+P">Pan Zhang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+T">Tao Wang</a>, <a href="/search/?searchtype=author&amp;query=Liao%2C+T">Tongkui Liao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06091v1-abstract-short" style="display: inline;"> Recent self-supervised learning (SSL) methods have demonstrated impressive results in learning visual representations from unlabeled remote sensing images. However, most remote sensing images predominantly consist of scenographic scenes containing multiple ground objects without explicit foreground targets, which limits the performance of existing SSL methods that focus on foreground targets. This&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06091v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06091v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06091v1-abstract-full" style="display: none;"> Recent self-supervised learning (SSL) methods have demonstrated impressive results in learning visual representations from unlabeled remote sensing images. However, most remote sensing images predominantly consist of scenographic scenes containing multiple ground objects without explicit foreground targets, which limits the performance of existing SSL methods that focus on foreground targets. This raises the question: Is there a method that can automatically aggregate similar objects within scenographic remote sensing images, thereby enabling models to differentiate knowledge embedded in various geospatial patterns for improved feature representation? In this work, we present the Pattern Integration and Enhancement Vision Transformer (PIEViT), a novel self-supervised learning framework designed specifically for remote sensing imagery. PIEViT utilizes a teacher-student architecture to address both image-level and patch-level tasks. It employs the Geospatial Pattern Cohesion (GPC) module to explore the natural clustering of patches, enhancing the differentiation of individual features. The Feature Integration Projection (FIP) module further refines masked token reconstruction using geospatially clustered patches. We validated PIEViT across multiple downstream tasks, including object detection, semantic segmentation, and change detection. Experiments demonstrated that PIEViT enhances the representation of internal patch features, providing significant improvements over existing self-supervised baselines. It achieves excellent results in object detection, land cover classification, and change detection, underscoring its robustness, generalization, and transferability for remote sensing image interpretation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06091v1-abstract-full').style.display = 'none'; document.getElementById('2411.06091v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03320">arXiv:2411.03320</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03320">pdf</a>, <a href="https://arxiv.org/format/2411.03320">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> log-RRIM: Yield Prediction via Local-to-global Reaction Representation Learning and Interaction Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Hu%2C+X">Xiao Hu</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Z">Ziqi Chen</a>, <a href="/search/?searchtype=author&amp;query=Peng%2C+B">Bo Peng</a>, <a href="/search/?searchtype=author&amp;query=Adu-Ampratwum%2C+D">Daniel Adu-Ampratwum</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xia Ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03320v3-abstract-short" style="display: inline;"> Accurate prediction of chemical reaction yields is crucial for optimizing organic synthesis, potentially reducing time and resources spent on experimentation. With the rise of artificial intelligence (AI), there is growing interest in leveraging AI-based methods to accelerate yield predictions without conducting in vitro experiments. We present log-RRIM, an innovative graph transformer-based frame&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03320v3-abstract-full').style.display = 'inline'; document.getElementById('2411.03320v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03320v3-abstract-full" style="display: none;"> Accurate prediction of chemical reaction yields is crucial for optimizing organic synthesis, potentially reducing time and resources spent on experimentation. With the rise of artificial intelligence (AI), there is growing interest in leveraging AI-based methods to accelerate yield predictions without conducting in vitro experiments. We present log-RRIM, an innovative graph transformer-based framework designed for predicting chemical reaction yields. Our approach implements a unique local-to-global reaction representation learning strategy. This approach initially captures detailed molecule-level information and then models and aggregates intermolecular interactions, ensuring that the impact of varying-sizes molecular fragments on yield is accurately accounted for. Another key feature of log-RRIM is its integration of a cross-attention mechanism that focuses on the interplay between reagents and reaction centers. This design reflects a fundamental principle in chemical reactions: the crucial role of reagents in influencing bond-breaking and formation processes, which ultimately affect reaction yields. log-RRIM outperforms existing methods in our experiments, especially for medium to high-yielding reactions, proving its reliability as a predictor. Its advanced modeling of reactant-reagent interactions and sensitivity to small molecular fragments make it a valuable tool for reaction planning and optimization in chemical synthesis. The data and codes of log-RRIM are accessible through https://github.com/ninglab/Yield_log_RRIM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03320v3-abstract-full').style.display = 'none'; document.getElementById('2411.03320v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19239">arXiv:2410.19239</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.19239">pdf</a>, <a href="https://arxiv.org/format/2410.19239">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Prompting Continual Person Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Zhang%2C+P">Pengcheng Zhang</a>, <a href="/search/?searchtype=author&amp;query=Yu%2C+X">Xiaohan Yu</a>, <a href="/search/?searchtype=author&amp;query=Bai%2C+X">Xiao Bai</a>, <a href="/search/?searchtype=author&amp;query=Zheng%2C+J">Jin Zheng</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xin Ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19239v1-abstract-short" style="display: inline;"> The development of person search techniques has been greatly promoted in recent years for its superior practicality and challenging goals. Despite their significant progress, existing person search models still lack the ability to continually learn from increaseing real-world data and adaptively process input from different domains. To this end, this work introduces the continual person search tas&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19239v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19239v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19239v1-abstract-full" style="display: none;"> The development of person search techniques has been greatly promoted in recent years for its superior practicality and challenging goals. Despite their significant progress, existing person search models still lack the ability to continually learn from increaseing real-world data and adaptively process input from different domains. To this end, this work introduces the continual person search task that sequentially learns on multiple domains and then performs person search on all seen domains. This requires balancing the stability and plasticity of the model to continually learn new knowledge without catastrophic forgetting. For this, we propose a Prompt-based Continual Person Search (PoPS) model in this paper. First, we design a compositional person search transformer to construct an effective pre-trained transformer without exhaustive pre-training from scratch on large-scale person search data. This serves as the fundamental for prompt-based continual learning. On top of that, we design a domain incremental prompt pool with a diverse attribute matching module. For each domain, we independently learn a set of prompts to encode the domain-oriented knowledge. Meanwhile, we jointly learn a group of diverse attribute projections and prototype embeddings to capture discriminative domain attributes. By matching an input image with the learned attributes across domains, the learned prompts can be properly selected for model inference. Extensive experiments are conducted to validate the proposed method for continual person search. The source code is available at https://github.com/PatrickZad/PoPS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19239v1-abstract-full').style.display = 'none'; document.getElementById('2410.19239v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACM MM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17337">arXiv:2410.17337</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.17337">pdf</a>, <a href="https://arxiv.org/format/2410.17337">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Captions Speak Louder than Images (CASLIE): Generalizing Foundation Models for E-commerce from High-quality Multimodal Instruction Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ling%2C+X">Xinyi Ling</a>, <a href="/search/?searchtype=author&amp;query=Peng%2C+B">Bo Peng</a>, <a href="/search/?searchtype=author&amp;query=Du%2C+H">Hanwen Du</a>, <a href="/search/?searchtype=author&amp;query=Zhu%2C+Z">Zhihui Zhu</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xia Ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17337v1-abstract-short" style="display: inline;"> Leveraging multimodal data to drive breakthroughs in e-commerce applications through Multimodal Foundation Models (MFMs) is gaining increasing attention from the research community. However, there are significant challenges that hinder the optimal use of multimodal e-commerce data by foundation models: (1) the scarcity of large-scale, high-quality multimodal benchmark datasets; and (2) the lack of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17337v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17337v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17337v1-abstract-full" style="display: none;"> Leveraging multimodal data to drive breakthroughs in e-commerce applications through Multimodal Foundation Models (MFMs) is gaining increasing attention from the research community. However, there are significant challenges that hinder the optimal use of multimodal e-commerce data by foundation models: (1) the scarcity of large-scale, high-quality multimodal benchmark datasets; and (2) the lack of effective multimodal information integration methods. To address these challenges, in this paper, we introduce MMECInstruct, the first-ever, large-scale, and high-quality multimodal instruction dataset for e-commerce. We also develop CASLIE, a simple, lightweight, yet effective framework for integrating multimodal information for e-commerce. Leveraging MMECInstruct, we fine-tune a series of e-commerce MFMs within CASLIE, denoted as CASLIE models. Our comprehensive evaluation demonstrates that CASLIE models substantially outperform 5 categories of advanced baseline models in the in-domain evaluation. Moreover, CASLIE models show strong generalizability to out-of-domain settings. MMECInstruct and CASLIE models are publicly accessible through https://ninglab.github.io/CASLIE/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17337v1-abstract-full').style.display = 'none'; document.getElementById('2410.17337v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Xinyi Ling and Bo Peng contributed equally to this paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09580">arXiv:2410.09580</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.09580">pdf</a>, <a href="https://arxiv.org/format/2410.09580">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SAPIENT: Mastering Multi-turn Conversational Recommendation with Strategic Planning and Monte Carlo Tree Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Du%2C+H">Hanwen Du</a>, <a href="/search/?searchtype=author&amp;query=Peng%2C+B">Bo Peng</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xia Ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09580v1-abstract-short" style="display: inline;"> Conversational Recommender Systems (CRS) proactively engage users in interactive dialogues to elicit user preferences and provide personalized recommendations. Existing methods train Reinforcement Learning (RL)-based agent with greedy action selection or sampling strategy, and may suffer from suboptimal conversational planning. To address this, we present a novel Monte Carlo Tree Search (MCTS)-bas&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09580v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09580v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09580v1-abstract-full" style="display: none;"> Conversational Recommender Systems (CRS) proactively engage users in interactive dialogues to elicit user preferences and provide personalized recommendations. Existing methods train Reinforcement Learning (RL)-based agent with greedy action selection or sampling strategy, and may suffer from suboptimal conversational planning. To address this, we present a novel Monte Carlo Tree Search (MCTS)-based CRS framework SAPIENT. SAPIENT consists of a conversational agent (S-agent) and a conversational planner (S-planner). S-planner builds a conversational search tree with MCTS based on the initial actions proposed by S-agent to find conversation plans. The best conversation plans from S-planner are used to guide the training of S-agent, creating a self-training loop where S-agent can iteratively improve its capability for conversational planning. Furthermore, we propose an efficient variant SAPIENT-e for trade-off between training efficiency and performance. Extensive experiments on four benchmark datasets validate the effectiveness of our approach, showing that SAPIENT outperforms the state-of-the-art baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09580v1-abstract-full').style.display = 'none'; document.getElementById('2410.09580v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06664">arXiv:2410.06664</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.06664">pdf</a>, <a href="https://arxiv.org/format/2410.06664">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Decouple-Then-Merge: Towards Better Training for Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ma%2C+Q">Qianli Ma</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+D">Dongrui Liu</a>, <a href="/search/?searchtype=author&amp;query=Niu%2C+L">Li Niu</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+L">Linfeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06664v1-abstract-short" style="display: inline;"> Diffusion models are trained by learning a sequence of models that reverse each step of noise corruption. Typically, the model parameters are fully shared across multiple timesteps to enhance training efficiency. However, since the denoising tasks differ at each timestep, the gradients computed at different timesteps may conflict, potentially degrading the overall performance of image generation.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06664v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06664v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06664v1-abstract-full" style="display: none;"> Diffusion models are trained by learning a sequence of models that reverse each step of noise corruption. Typically, the model parameters are fully shared across multiple timesteps to enhance training efficiency. However, since the denoising tasks differ at each timestep, the gradients computed at different timesteps may conflict, potentially degrading the overall performance of image generation. To solve this issue, this work proposes a Decouple-then-Merge (DeMe) framework, which begins with a pretrained model and finetunes separate models tailored to specific timesteps. We introduce several improved techniques during the finetuning stage to promote effective knowledge sharing while minimizing training interference across timesteps. Finally, after finetuning, these separate models can be merged into a single model in the parameter space, ensuring efficient and practical inference. Experimental results show significant generation quality improvements upon 6 benchmarks including Stable Diffusion on COCO30K, ImageNet1K, PartiPrompts, and DDPM on LSUN Church, LSUN Bedroom, and CIFAR10. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06664v1-abstract-full').style.display = 'none'; document.getElementById('2410.06664v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05080">arXiv:2410.05080</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.05080">pdf</a>, <a href="https://arxiv.org/format/2410.05080">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ScienceAgentBench: Toward Rigorous Assessment of Language Agents for Data-Driven Scientific Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Chen%2C+Z">Ziru Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+S">Shijie Chen</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+Y">Yuting Ning</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+Q">Qianheng Zhang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+B">Boshi Wang</a>, <a href="/search/?searchtype=author&amp;query=Yu%2C+B">Botao Yu</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+Y">Yifei Li</a>, <a href="/search/?searchtype=author&amp;query=Liao%2C+Z">Zeyi Liao</a>, <a href="/search/?searchtype=author&amp;query=Wei%2C+C">Chen Wei</a>, <a href="/search/?searchtype=author&amp;query=Lu%2C+Z">Zitong Lu</a>, <a href="/search/?searchtype=author&amp;query=Dey%2C+V">Vishal Dey</a>, <a href="/search/?searchtype=author&amp;query=Xue%2C+M">Mingyi Xue</a>, <a href="/search/?searchtype=author&amp;query=Baker%2C+F+N">Frazier N. Baker</a>, <a href="/search/?searchtype=author&amp;query=Burns%2C+B">Benjamin Burns</a>, <a href="/search/?searchtype=author&amp;query=Adu-Ampratwum%2C+D">Daniel Adu-Ampratwum</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+X">Xuhui Huang</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xia Ning</a>, <a href="/search/?searchtype=author&amp;query=Gao%2C+S">Song Gao</a>, <a href="/search/?searchtype=author&amp;query=Su%2C+Y">Yu Su</a>, <a href="/search/?searchtype=author&amp;query=Sun%2C+H">Huan Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05080v2-abstract-short" style="display: inline;"> The advancements of language language models (LLMs) have piqued growing interest in developing LLM-based language agents to automate scientific discovery end-to-end, which has sparked both excitement and skepticism about their true capabilities. In this work, we call for rigorous assessment of agents on individual tasks in a scientific workflow before making bold claims on end-to-end automation. T&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05080v2-abstract-full').style.display = 'inline'; document.getElementById('2410.05080v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05080v2-abstract-full" style="display: none;"> The advancements of language language models (LLMs) have piqued growing interest in developing LLM-based language agents to automate scientific discovery end-to-end, which has sparked both excitement and skepticism about their true capabilities. In this work, we call for rigorous assessment of agents on individual tasks in a scientific workflow before making bold claims on end-to-end automation. To ensure the scientific authenticity and real-world relevance of our benchmark, we extract 102 tasks from 44 peer-reviewed publications in four disciplines and engage nine subject matter experts to validate them. We unify the target output for every task to a self-contained Python program file and employ an array of evaluation metrics to examine the generated programs, execution results, and costs. Each task goes through multiple rounds of manual validation by annotators and subject matter experts to ensure its annotation quality and scientific plausibility. We also propose two effective strategies to mitigate data contamination concerns. Using our benchmark, we evaluate five open-weight and proprietary LLMs, each with three frameworks: direct prompting, OpenHands CodeAct, and self-debug. Given three attempts for each task, the best-performing agent can only solve 32.4% of the tasks independently and 34.3% with expert-provided knowledge. In addition, we evaluate OpenAI o1 with direct prompting and self-debug, which demonstrates the effectiveness of increasing inference-time compute. Still, our results underscore the limitations of current language agents in generating code for data-driven discovery, let alone end-to-end automation for scientific research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05080v2-abstract-full').style.display = 'none'; document.getElementById('2410.05080v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">57 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04603">arXiv:2410.04603</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.04603">pdf</a>, <a href="https://arxiv.org/format/2410.04603">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Self-compensating Light Calorimetry with Liquid Argon Time Projection Chamber for GeV Neutrino Physics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuyang Ning</a>, <a href="/search/?searchtype=author&amp;query=Shi%2C+W">Wei Shi</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+C">Chao Zhang</a>, <a href="/search/?searchtype=author&amp;query=Riccio%2C+C">Ciro Riccio</a>, <a href="/search/?searchtype=author&amp;query=Jo%2C+J+H">Jay Hyun Jo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04603v1-abstract-short" style="display: inline;"> Liquid Argon Time Projection Chamber (LArTPC) is an exceptional dual calorimeter capable of estimating the energy of incident particles through both the ionization charge and the scintillation light. Our studies show that due to the mechanisms of charge recombination and light generation involved in the energy dissipation in liquid argon, light calorimetry in LArTPCs is inherently self-compensatin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04603v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04603v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04603v1-abstract-full" style="display: none;"> Liquid Argon Time Projection Chamber (LArTPC) is an exceptional dual calorimeter capable of estimating the energy of incident particles through both the ionization charge and the scintillation light. Our studies show that due to the mechanisms of charge recombination and light generation involved in the energy dissipation in liquid argon, light calorimetry in LArTPCs is inherently self-compensating: the missing energy in the hadronic component is compensated for by the extra recombination luminescence compared to the electromagnetic component. Good compensation of the electron-to-hadron response ratio (e/h) around unity can be achieved across a broad range of drift electric fields from 0.2 to 1.8 kV/cm.This inherent self-compensation enhances the appeal of light calorimetry in LArTPCs, complementing the well-established charge calorimetry. Using GeV neutrinos as a case study, we show that light calorimetry can achieve an energy resolution comparable to the more sophisticated charge imaging calorimetry. The synergy between light and charge calorimetry offers a novel approach to evaluating and mitigating systematic uncertainties in energy measurements with LArTPCs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04603v1-abstract-full').style.display = 'none'; document.getElementById('2410.04603v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01699">arXiv:2410.01699</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.01699">pdf</a>, <a href="https://arxiv.org/format/2410.01699">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Accelerating Auto-regressive Text-to-Image Generation with Training-free Speculative Jacobi Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Teng%2C+Y">Yao Teng</a>, <a href="/search/?searchtype=author&amp;query=Shi%2C+H">Han Shi</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+X">Xian Liu</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+Z">Zhenguo Li</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+X">Xihui Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01699v1-abstract-short" style="display: inline;"> The current large auto-regressive models can generate high-quality, high-resolution images, but these models require hundreds or even thousands of steps of next-token prediction during inference, resulting in substantial time consumption. In existing studies, Jacobi decoding, an iterative parallel decoding algorithm, has been used to accelerate the auto-regressive generation and can be executed wi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01699v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01699v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01699v1-abstract-full" style="display: none;"> The current large auto-regressive models can generate high-quality, high-resolution images, but these models require hundreds or even thousands of steps of next-token prediction during inference, resulting in substantial time consumption. In existing studies, Jacobi decoding, an iterative parallel decoding algorithm, has been used to accelerate the auto-regressive generation and can be executed without training. However, the Jacobi decoding relies on a deterministic criterion to determine the convergence of iterations. Thus, it works for greedy decoding but is incompatible with sampling-based decoding which is crucial for visual quality and diversity in the current auto-regressive text-to-image generation. In this paper, we propose a training-free probabilistic parallel decoding algorithm, Speculative Jacobi Decoding (SJD), to accelerate auto-regressive text-to-image generation. By introducing a probabilistic convergence criterion, our SJD accelerates the inference of auto-regressive text-to-image generation while maintaining the randomness in sampling-based token decoding and allowing the model to generate diverse images. Specifically, SJD facilitates the model to predict multiple tokens at each step and accepts tokens based on the probabilistic criterion, enabling the model to generate images with fewer steps than the conventional next-token-prediction paradigm. We also investigate the token initialization strategies that leverage the spatial locality of visual data to further improve the acceleration ratio under specific scenarios. We conduct experiments for our proposed SJD on multiple auto-regressive text-to-image generation models, showing the effectiveness of model acceleration without sacrificing the visual quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01699v1-abstract-full').style.display = 'none'; document.getElementById('2410.01699v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10593">arXiv:2409.10593</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.10593">pdf</a>, <a href="https://arxiv.org/format/2409.10593">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CSKV: Training-Efficient Channel Shrinking for KV Cache in Long-Context Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Wang%2C+L">Luning Wang</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+S">Shiyao Li</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Yuan%2C+Z">Zhihang Yuan</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10593v3-abstract-short" style="display: inline;"> Large Language Models (LLMs) have been widely adopted to process long-context tasks. However, the large memory overhead of the key-value (KV) cache poses significant challenges in long-context scenarios. Existing training-free KV cache compression methods typically focus on quantization and token pruning, which have compression limits, and excessive sparsity can lead to severe performance degradat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10593v3-abstract-full').style.display = 'inline'; document.getElementById('2409.10593v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10593v3-abstract-full" style="display: none;"> Large Language Models (LLMs) have been widely adopted to process long-context tasks. However, the large memory overhead of the key-value (KV) cache poses significant challenges in long-context scenarios. Existing training-free KV cache compression methods typically focus on quantization and token pruning, which have compression limits, and excessive sparsity can lead to severe performance degradation. Other methods design new architectures with less KV overhead but require significant training overhead. To address the above two drawbacks, we further explore the redundancy in the channel dimension and apply an architecture-level design with minor training costs. Therefore, we introduce CSKV, a training-efficient Channel Shrinking technique for KV cache compression: (1) We first analyze the singular value distribution of the KV cache, revealing significant redundancy and compression potential along the channel dimension. Based on this observation, we propose using low-rank decomposition for key and value layers and storing the low-dimension features. (2) To preserve model performance, we introduce a bi-branch KV cache, including a window-based full-precision KV cache and a low-precision compressed KV cache. (3) To reduce the training costs, we minimize the layer-wise reconstruction loss for the compressed KV cache instead of retraining the entire LLMs. Extensive experiments show that CSKV can reduce the memory overhead of the KV cache by 80% while maintaining the model&#39;s long-context capability. Moreover, we show that our method can be seamlessly combined with quantization to further reduce the memory overhead, achieving a compression ratio of up to 95%. Code is available at https://github.com/wln20/CSKV. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10593v3-abstract-full').style.display = 'none'; document.getElementById('2409.10593v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4th NeurIPS Efficient Natural Language and Speech Processing Workshop (ENLSP-IV 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00773">arXiv:2409.00773</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.00773">pdf</a>, <a href="https://arxiv.org/format/2409.00773">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Searching for MeV-scale Axion-like Particles and Dark Photons with PandaX-4T </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=PandaX+Collaboration"> PandaX Collaboration</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+T">Tao Li</a>, <a href="/search/?searchtype=author&amp;query=Bo%2C+Z">Zihao Bo</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+X">Xun Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Y">Yunhua Chen</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+Z">Zhaokan Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cui%2C+X">Xiangyi Cui</a>, <a href="/search/?searchtype=author&amp;query=Fan%2C+Y">Yingjie Fan</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+D">Deqing Fang</a>, <a href="/search/?searchtype=author&amp;query=Gao%2C+Z">Zhixing Gao</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+L">Lisheng Geng</a>, <a href="/search/?searchtype=author&amp;query=Giboni%2C+K">Karl Giboni</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xunan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xuyuan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+Z">Zichao Guo</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+C">Chencheng Han</a>, <a href="/search/?searchtype=author&amp;query=He%2C+K+H">Ke HanChangda He</a>, <a href="/search/?searchtype=author&amp;query=He%2C+J">Jinrong He</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+D">Di Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+H">Houqi Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+J">Junting Huang</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+R">Ruquan Hou</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+Y">Yu Hou</a>, <a href="/search/?searchtype=author&amp;query=Ji%2C+X">Xiangdong Ji</a> , et al. (76 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00773v1-abstract-short" style="display: inline;"> Axion-like particles (ALPs) and dark photons (DPs) are viable dark matter particle candidates. We have searched for possible ALP/DP signals in the PandaX-4T liquid xenon detector using 94.8 days of data. A binned likelihood fit is constructed to search for possible mono-energetic peaks induced by the absorption processes between ALPs/DPs and atomic electrons of xenon. A detailed temporal model of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00773v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00773v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00773v1-abstract-full" style="display: none;"> Axion-like particles (ALPs) and dark photons (DPs) are viable dark matter particle candidates. We have searched for possible ALP/DP signals in the PandaX-4T liquid xenon detector using 94.8 days of data. A binned likelihood fit is constructed to search for possible mono-energetic peaks induced by the absorption processes between ALPs/DPs and atomic electrons of xenon. A detailed temporal model of decays associated with xenon isotopes is introduced to constrain the number of background events. No signal excess over background expectations is observed, and we have established the most stringent exclusion limits for most ALP/DP masses ranging from 150 keV/$c^2$ to 1 MeV/$c^2$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00773v1-abstract-full').style.display = 'none'; document.getElementById('2409.00773v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09158">arXiv:2408.09158</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.09158">pdf</a>, <a href="https://arxiv.org/format/2408.09158">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Linear Attention is Enough in Spatial-Temporal Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xinyu Ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09158v2-abstract-short" style="display: inline;"> As the most representative scenario of spatial-temporal forecasting tasks, the traffic forecasting task attracted numerous attention from machine learning community due to its intricate correlation both in space and time dimension. Existing methods often treat road networks over time as spatial-temporal graphs, addressing spatial and temporal representations independently. However, these approache&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09158v2-abstract-full').style.display = 'inline'; document.getElementById('2408.09158v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09158v2-abstract-full" style="display: none;"> As the most representative scenario of spatial-temporal forecasting tasks, the traffic forecasting task attracted numerous attention from machine learning community due to its intricate correlation both in space and time dimension. Existing methods often treat road networks over time as spatial-temporal graphs, addressing spatial and temporal representations independently. However, these approaches struggle to capture the dynamic topology of road networks, encounter issues with message passing mechanisms and over-smoothing, and face challenges in learning spatial and temporal relationships separately. To address these limitations, we propose treating nodes in road networks at different time steps as independent spatial-temporal tokens and feeding them into a vanilla Transformer to learn complex spatial-temporal patterns, design \textbf{STformer} achieving SOTA. Given its quadratic complexity, we introduce a variant \textbf{NSTformer} based on Nystr$\ddot{o}$m method to approximate self-attention with linear complexity but even slightly better than former in a few cases astonishingly. Extensive experimental results on traffic datasets demonstrate that the proposed method achieves state-of-the-art performance at an affordable computational cost. Our code is available at \href{https://github.com/XinyuNing/STformer-and-NSTformer}{https://github.com/XinyuNing/STformer-and-NSTformer}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09158v2-abstract-full').style.display = 'none'; document.getElementById('2408.09158v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08491">arXiv:2408.08491</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08491">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> Multifunctional Bistable Ultrathin Composite Booms with Flexible Electronics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Yao%2C+Y">Yao Yao</a>, <a href="/search/?searchtype=author&amp;query=Fernandez%2C+J+M">Juan M. Fernandez</a>, <a href="/search/?searchtype=author&amp;query=Bilen%2C+S+G">Sven G. Bilen</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xin Ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08491v1-abstract-short" style="display: inline;"> Small satellites such as CubeSats pose demanding requirements on the weight, size, and multifunctionality of their structures due to extreme constraints on the payload mass and volume. To address this challenge, we introduce a concept of multifunctional deployable space structures for CubeSats based on ultrathin, elastically foldable, and self-deployable bistable composite structures integrated wi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08491v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08491v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08491v1-abstract-full" style="display: none;"> Small satellites such as CubeSats pose demanding requirements on the weight, size, and multifunctionality of their structures due to extreme constraints on the payload mass and volume. To address this challenge, we introduce a concept of multifunctional deployable space structures for CubeSats based on ultrathin, elastically foldable, and self-deployable bistable composite structures integrated with flexible electronics. The multifunctional bistable booms can be stored in a coiled configuration and self-deploy into a long structure upon initiation by releasing the stored strain energy. The boom demonstrates the capabilities of delivering power and transmitting data from the CubeSat to the flexible devices on the boom tip. The boom also shows the ability to monitor the dynamics and vibration during and after the deployment. A payload boom has been installed in a 3U CubeSat as flight hardware for in-space testing and demonstration. This effort combines morphable ultrathin composite structures with flexible electronics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08491v1-abstract-full').style.display = 'none'; document.getElementById('2408.08491v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07641">arXiv:2408.07641</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.07641">pdf</a>, <a href="https://arxiv.org/format/2408.07641">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Exploring New Physics with PandaX-4T Low Energy Electronic Recoil Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=PandaX+Collaboration"> PandaX Collaboration</a>, <a href="/search/?searchtype=author&amp;query=Zeng%2C+X">Xinning Zeng</a>, <a href="/search/?searchtype=author&amp;query=Bo%2C+Z">Zihao Bo</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+X">Xun Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Y">Yunhua Chen</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+Z">Zhaokan Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cui%2C+X">Xiangyi Cui</a>, <a href="/search/?searchtype=author&amp;query=Fan%2C+Y">Yingjie Fan</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+D">Deqing Fang</a>, <a href="/search/?searchtype=author&amp;query=Gao%2C+Z">Zhixing Gao</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+L">Lisheng Geng</a>, <a href="/search/?searchtype=author&amp;query=Giboni%2C+K">Karl Giboni</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xunan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xuyuan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+Z">Zichao Guo</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+C">Chencheng Han</a>, <a href="/search/?searchtype=author&amp;query=He%2C+K+H">Ke HanChangda He</a>, <a href="/search/?searchtype=author&amp;query=He%2C+J">Jinrong He</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+D">Di Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+H">Houqi Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+J">Junting Huang</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+R">Ruquan Hou</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+Y">Yu Hou</a>, <a href="/search/?searchtype=author&amp;query=Ji%2C+X">Xiangdong Ji</a> , et al. (76 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07641v1-abstract-short" style="display: inline;"> New particles beyond the Standard Model of particle physics, such as axions, can be effectively searched through their interactions with electrons. We use the large liquid xenon detector PandaX-4T to search for novel electronic recoil signals induced by solar axions, neutrinos with anomalous magnetic moment, axion-like particles, dark photons, and light fermionic dark matter. A detailed background&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07641v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07641v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07641v1-abstract-full" style="display: none;"> New particles beyond the Standard Model of particle physics, such as axions, can be effectively searched through their interactions with electrons. We use the large liquid xenon detector PandaX-4T to search for novel electronic recoil signals induced by solar axions, neutrinos with anomalous magnetic moment, axion-like particles, dark photons, and light fermionic dark matter. A detailed background model is established with the latest datasets with 1.54 $\rm tonne \cdot year$ exposure. No significant excess above the background has been observed, and we have obtained competitive constraints for axion couplings, neutrino magnetic moment, and fermionic dark matter interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07641v1-abstract-full').style.display = 'none'; document.getElementById('2408.07641v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00664">arXiv:2408.00664</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.00664">pdf</a>, <a href="https://arxiv.org/format/2408.00664">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Dark Matter Search Results from 1.54 Tonne$\cdot$Year Exposure of PandaX-4T </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=PandaX+Collaboration"> PandaX Collaboration</a>, <a href="/search/?searchtype=author&amp;query=Bo%2C+Z">Zihao Bo</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+X">Xun Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Y">Yunhua Chen</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+Z">Zhaokan Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cui%2C+X">Xiangyi Cui</a>, <a href="/search/?searchtype=author&amp;query=Fan%2C+Y">Yingjie Fan</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+D">Deqing Fang</a>, <a href="/search/?searchtype=author&amp;query=Gao%2C+Z">Zhixing Gao</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+L">Lisheng Geng</a>, <a href="/search/?searchtype=author&amp;query=Giboni%2C+K">Karl Giboni</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xunan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xuyuan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+Z">Zichao Guo</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+C">Chencheng Han</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+K">Ke Han</a>, <a href="/search/?searchtype=author&amp;query=He%2C+C">Changda He</a>, <a href="/search/?searchtype=author&amp;query=He%2C+J">Jinrong He</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+D">Di Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+H">Houqi Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+J">Junting Huang</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+R">Ruquan Hou</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+Y">Yu Hou</a>, <a href="/search/?searchtype=author&amp;query=Ji%2C+X">Xiangdong Ji</a> , et al. (77 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00664v1-abstract-short" style="display: inline;"> In this letter, we report the dark matter search results from the commissioning run and the first science run of the PandaX-4T experiment. A blind analysis is carried out on the entire data set. The data processing is improved compared to previous work, unifying the low-level signal reconstruction in a wide energy range up to 120 keV. With a total exposure of 1.54 tonne$\cdot$year, no significant&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00664v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00664v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00664v1-abstract-full" style="display: none;"> In this letter, we report the dark matter search results from the commissioning run and the first science run of the PandaX-4T experiment. A blind analysis is carried out on the entire data set. The data processing is improved compared to previous work, unifying the low-level signal reconstruction in a wide energy range up to 120 keV. With a total exposure of 1.54 tonne$\cdot$year, no significant excess of nuclear recoil events is found. The lowest 90% confidence level exclusion on the spin-independent cross section is $1.6 \times 10^{-47} \mathrm{cm}^2$ at a dark matter mass of 40 GeV$/c^2$. Our results represent the most stringent constraint for a dark matter mass above 100 GeV$/c^2$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00664v1-abstract-full').style.display = 'none'; document.getElementById('2408.00664v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00429">arXiv:2408.00429</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.00429">pdf</a>, <a href="https://arxiv.org/format/2408.00429">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Augmenting Channel Simulator and Semi- Supervised Learning for Efficient Indoor Positioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Li%2C+Y">Yupeng Li</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xinyu Ning</a>, <a href="/search/?searchtype=author&amp;query=Gao%2C+S">Shijian Gao</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+Y">Yitong Liu</a>, <a href="/search/?searchtype=author&amp;query=Sun%2C+Z">Zhi Sun</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Q">Qixing Wang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+J">Jiangzhou Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00429v1-abstract-short" style="display: inline;"> This work aims to tackle the labor-intensive and resource-consuming task of indoor positioning by proposing an efficient approach. The proposed approach involves the introduction of a semi-supervised learning (SSL) with a biased teacher (SSLB) algorithm, which effectively utilizes both labeled and unlabeled channel data. To reduce measurement expenses, unlabeled data is generated using an updated&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00429v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00429v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00429v1-abstract-full" style="display: none;"> This work aims to tackle the labor-intensive and resource-consuming task of indoor positioning by proposing an efficient approach. The proposed approach involves the introduction of a semi-supervised learning (SSL) with a biased teacher (SSLB) algorithm, which effectively utilizes both labeled and unlabeled channel data. To reduce measurement expenses, unlabeled data is generated using an updated channel simulator (UCHS), and then weighted by adaptive confidence values to simplify the tuning of hyperparameters. Simulation results demonstrate that the proposed strategy achieves superior performance while minimizing measurement overhead and training expense compared to existing benchmarks, offering a valuable and practical solution for indoor positioning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00429v1-abstract-full').style.display = 'none'; document.getElementById('2408.00429v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACCEPTED for presentation at 2024 IEEE Global Communications Conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13519">arXiv:2407.13519</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.13519">pdf</a>, <a href="https://arxiv.org/format/2407.13519">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GPSFormer: A Global Perception and Local Structure Fitting-based Transformer for Point Cloud Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Wang%2C+C">Changshuo Wang</a>, <a href="/search/?searchtype=author&amp;query=Wu%2C+M">Meiqing Wu</a>, <a href="/search/?searchtype=author&amp;query=Lam%2C+S">Siew-Kei Lam</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xin Ning</a>, <a href="/search/?searchtype=author&amp;query=Yu%2C+S">Shangshu Yu</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+R">Ruiping Wang</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+W">Weijun Li</a>, <a href="/search/?searchtype=author&amp;query=Srikanthan%2C+T">Thambipillai Srikanthan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13519v2-abstract-short" style="display: inline;"> Despite the significant advancements in pre-training methods for point cloud understanding, directly capturing intricate shape information from irregular point clouds without reliance on external data remains a formidable challenge. To address this problem, we propose GPSFormer, an innovative Global Perception and Local Structure Fitting-based Transformer, which learns detailed shape information f&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13519v2-abstract-full').style.display = 'inline'; document.getElementById('2407.13519v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13519v2-abstract-full" style="display: none;"> Despite the significant advancements in pre-training methods for point cloud understanding, directly capturing intricate shape information from irregular point clouds without reliance on external data remains a formidable challenge. To address this problem, we propose GPSFormer, an innovative Global Perception and Local Structure Fitting-based Transformer, which learns detailed shape information from point clouds with remarkable precision. The core of GPSFormer is the Global Perception Module (GPM) and the Local Structure Fitting Convolution (LSFConv). Specifically, GPM utilizes Adaptive Deformable Graph Convolution (ADGConv) to identify short-range dependencies among similar features in the feature space and employs Multi-Head Attention (MHA) to learn long-range dependencies across all positions within the feature space, ultimately enabling flexible learning of contextual representations. Inspired by Taylor series, we design LSFConv, which learns both low-order fundamental and high-order refinement information from explicitly encoded local geometric structures. Integrating the GPM and LSFConv as fundamental components, we construct GPSFormer, a cutting-edge Transformer that effectively captures global and local structures of point clouds. Extensive experiments validate GPSFormer&#39;s effectiveness in three point cloud tasks: shape classification, part segmentation, and few-shot learning. The code of GPSFormer is available at \url{https://github.com/changshuowang/GPSFormer}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13519v2-abstract-full').style.display = 'none'; document.getElementById('2407.13519v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10892">arXiv:2407.10892</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.10892">pdf</a>, <a href="https://arxiv.org/format/2407.10892">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Solar and Stellar Astrophysics">astro-ph.SR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Nuclear Experiment">nucl-ex</span> </div> </div> <p class="title is-5 mathjax"> First Indication of Solar $^8$B Neutrino Flux through Coherent Elastic Neutrino-Nucleus Scattering in PandaX-4T </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=PandaX+Collaboration"> PandaX Collaboration</a>, <a href="/search/?searchtype=author&amp;query=Bo%2C+Z">Zihao Bo</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+X">Xun Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Y">Yunhua Chen</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+Z">Zhaokan Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cui%2C+X">Xiangyi Cui</a>, <a href="/search/?searchtype=author&amp;query=Fan%2C+Y">Yingjie Fan</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+D">Deqing Fang</a>, <a href="/search/?searchtype=author&amp;query=Gao%2C+Z">Zhixing Gao</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+L">Lisheng Geng</a>, <a href="/search/?searchtype=author&amp;query=Giboni%2C+K">Karl Giboni</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xunan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xuyuan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+Z">Zichao Guo</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+C">Chencheng Han</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+K">Ke Han</a>, <a href="/search/?searchtype=author&amp;query=He%2C+C">Changda He</a>, <a href="/search/?searchtype=author&amp;query=He%2C+J">Jinrong He</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+D">Di Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+H">Houqi Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+J">Junting Huang</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+R">Ruquan Hou</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+Y">Yu Hou</a>, <a href="/search/?searchtype=author&amp;query=Ji%2C+X">Xiangdong Ji</a> , et al. (77 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10892v3-abstract-short" style="display: inline;"> The PandaX-4T liquid xenon detector at the China Jinping Underground Laboratory is used to measure the solar $^8$B neutrino flux by detecting neutrinos through coherent scattering with xenon nuclei. Data samples requiring the coincidence of scintillation and ionization signals (paired), as well as unpaired ionization-only signals (US2), are selected with energy threshold of approximately 1.1 keV (&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10892v3-abstract-full').style.display = 'inline'; document.getElementById('2407.10892v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10892v3-abstract-full" style="display: none;"> The PandaX-4T liquid xenon detector at the China Jinping Underground Laboratory is used to measure the solar $^8$B neutrino flux by detecting neutrinos through coherent scattering with xenon nuclei. Data samples requiring the coincidence of scintillation and ionization signals (paired), as well as unpaired ionization-only signals (US2), are selected with energy threshold of approximately 1.1 keV (0.33 keV) nuclear recoil energy. Combining the commissioning run and the first science run of PandaX-4T, a total exposure of 1.20 and 1.04 tonne$\cdot$year are collected for the paired and US2, respectively. After unblinding, 3 and 332 events are observed with an expectation of 2.8$\pm$0.5 and 251$\pm$32 background events, for the paired and US2 data, respectively. A combined analysis yields a best-fit $^8$B neutrino signal of 3.5 (75) events from the paired (US2) data sample, with $\sim$37\% uncertainty, and the background-only hypothesis is disfavored at 2.64$蟽$ significance. This gives a solar $^8$B neutrino flux of ($8.4\pm3.1$)$\times$10$^6$ cm$^{-2}$s$^{-1}$, consistent with the standard solar model prediction. It is also the first indication of solar $^8$B neutrino ``fog&#39;&#39; in a dark matter direct detection experiment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10892v3-abstract-full').style.display = 'none'; document.getElementById('2407.10892v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Physical Review Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04629">arXiv:2407.04629</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.04629">pdf</a>, <a href="https://arxiv.org/format/2407.04629">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Entity Decomposition with Filtering: A Zero-Shot Clinical Named Entity Recognition Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Averly%2C+R">Reza Averly</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xia Ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04629v1-abstract-short" style="display: inline;"> Clinical named entity recognition (NER) aims to retrieve important entities within clinical narratives. Recent works have demonstrated that large language models (LLMs) can achieve strong performance in this task. While previous works focus on proprietary LLMs, we investigate how open NER LLMs, trained specifically for entity recognition, perform in clinical NER. In this paper, we aim to improve t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04629v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04629v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04629v1-abstract-full" style="display: none;"> Clinical named entity recognition (NER) aims to retrieve important entities within clinical narratives. Recent works have demonstrated that large language models (LLMs) can achieve strong performance in this task. While previous works focus on proprietary LLMs, we investigate how open NER LLMs, trained specifically for entity recognition, perform in clinical NER. In this paper, we aim to improve them through a novel framework, entity decomposition with filtering, or EDF. Our key idea is to decompose the entity recognition task into several retrievals of sub-entity types. We also introduce a filtering mechanism to remove incorrect entities. Our experimental results demonstrate the efficacy of our framework across all metrics, models, datasets, and entity types. Our analysis reveals that entity decomposition can recognize previously missed entities with substantial improvement. We further provide a comprehensive evaluation of our framework and an in-depth error analysis to pave future works. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04629v1-abstract-full').style.display = 'none'; document.getElementById('2407.04629v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00945">arXiv:2407.00945</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.00945">pdf</a>, <a href="https://arxiv.org/format/2407.00945">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Efficient Expert Pruning for Sparse Mixture-of-Experts Language Models: Enhancing Performance and Reducing Inference Costs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Liu%2C+E">Enshu Liu</a>, <a href="/search/?searchtype=author&amp;query=Zhu%2C+J">Junyi Zhu</a>, <a href="/search/?searchtype=author&amp;query=Lin%2C+Z">Zinan Lin</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Blaschko%2C+M+B">Matthew B. Blaschko</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Yang%2C+H">Huazhong Yang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00945v1-abstract-short" style="display: inline;"> The rapid advancement of large language models (LLMs) has led to architectures with billions to trillions of parameters, posing significant deployment challenges due to their substantial demands on memory, processing power, and energy consumption. Sparse Mixture-of-Experts (SMoE) architectures have emerged as a solution, activating only a subset of parameters per token, thereby achieving faster in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00945v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00945v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00945v1-abstract-full" style="display: none;"> The rapid advancement of large language models (LLMs) has led to architectures with billions to trillions of parameters, posing significant deployment challenges due to their substantial demands on memory, processing power, and energy consumption. Sparse Mixture-of-Experts (SMoE) architectures have emerged as a solution, activating only a subset of parameters per token, thereby achieving faster inference while maintaining performance. However, SMoE models still face limitations in broader deployment due to their large parameter counts and significant GPU memory requirements. In this work, we introduce a gradient-free evolutionary strategy named EEP (Efficient Expert P}runing) to enhance the pruning of experts in SMoE models. EEP relies solely on model inference (i.e., no gradient computation) and achieves greater sparsity while maintaining or even improving performance on downstream tasks. EEP can be used to reduce both the total number of experts (thus saving GPU memory) and the number of active experts (thus accelerating inference). For example, we demonstrate that pruning up to 75% of experts in Mixtral $8\times7$B-Instruct results in a substantial reduction in parameters with minimal performance loss. Remarkably, we observe improved performance on certain tasks, such as a significant increase in accuracy on the SQuAD dataset (from 53.4% to 75.4%), when pruning half of the experts. With these results, EEP not only lowers the barrier to deploying SMoE models,but also challenges the conventional understanding of model pruning by showing that fewer experts can lead to better task-specific performance without any fine-tuning. Code is available at https://github.com/imagination-research/EEP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00945v1-abstract-full').style.display = 'none'; document.getElementById('2407.00945v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14909">arXiv:2406.14909</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.14909">pdf</a>, <a href="https://arxiv.org/format/2406.14909">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MoA: Mixture of Sparse Attention for Automatic Large Language Model Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Fu%2C+T">Tianyu Fu</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+H">Haofeng Huang</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+G">Genghan Zhang</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+B">Boju Chen</a>, <a href="/search/?searchtype=author&amp;query=Wu%2C+T">Tianqi Wu</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+H">Hongyi Wang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+Z">Zixiao Huang</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+S">Shiyao Li</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Yang%2C+H">Huazhong Yang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14909v2-abstract-short" style="display: inline;"> Sparse attention can effectively mitigate the significant memory and throughput demands of Large Language Models (LLMs) in long contexts. Existing methods typically employ a uniform sparse attention mask, applying the same sparse pattern across different attention heads and input lengths. However, this uniform approach fails to capture the diverse attention patterns inherent in LLMs, ignoring thei&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14909v2-abstract-full').style.display = 'inline'; document.getElementById('2406.14909v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14909v2-abstract-full" style="display: none;"> Sparse attention can effectively mitigate the significant memory and throughput demands of Large Language Models (LLMs) in long contexts. Existing methods typically employ a uniform sparse attention mask, applying the same sparse pattern across different attention heads and input lengths. However, this uniform approach fails to capture the diverse attention patterns inherent in LLMs, ignoring their distinct accuracy-latency trade-offs. To address this challenge, we propose the Mixture of Attention (MoA), which automatically tailors distinct sparse attention configurations to different heads and layers. MoA constructs and navigates a search space of various attention patterns and their scaling rules relative to input sequence lengths. It profiles the model, evaluates potential configurations, and pinpoints the optimal sparse attention compression plan. MoA adapts to varying input sizes, revealing that some attention heads expand their focus to accommodate longer sequences, while other heads consistently concentrate on fixed-length local contexts. Experiments show that MoA increases the effective context length by $3.9\times$ with the same average attention span, boosting retrieval accuracy by $1.5-7.1\times$ over the uniform-attention baseline across Vicuna-{7B,13B}, and Llama3-{8B,70B} models. Moreover, MoA narrows the capability gaps between sparse and dense models, reducing the maximum relative performance drop from $9\%-36\%$ to within $5\%$ across two long-context understanding benchmarks. MoA achieves a $1.2-1.4\times$ GPU memory reduction, boosting decode throughput by $6.6-8.2\times$ and $1.7-1.9\times$ compared to FlashAttention2 and vLLM, with minimal impact on performance. Our code is available at \url{https://github.com/thu-nics/MoA}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14909v2-abstract-full').style.display = 'none'; document.getElementById('2406.14909v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14629">arXiv:2406.14629</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.14629">pdf</a>, <a href="https://arxiv.org/format/2406.14629">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Can LLMs Learn by Teaching for Better Reasoning? A Preliminary Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Z">Zifu Wang</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+S">Shiyao Li</a>, <a href="/search/?searchtype=author&amp;query=Lin%2C+Z">Zinan Lin</a>, <a href="/search/?searchtype=author&amp;query=Yao%2C+P">Peiran Yao</a>, <a href="/search/?searchtype=author&amp;query=Fu%2C+T">Tianyu Fu</a>, <a href="/search/?searchtype=author&amp;query=Blaschko%2C+M+B">Matthew B. Blaschko</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Yang%2C+H">Huazhong Yang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14629v2-abstract-short" style="display: inline;"> Teaching to improve student models (e.g., knowledge distillation) is an extensively studied methodology in LLMs. However, for humans, teaching improves not only students but also teachers, by fostering more rigorous and clear reasoning as well as knowledge building. We ask: Can LLMs also learn by teaching (LbT) for better reasoning? If the answer is yes, we can potentially unlock the possibility o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14629v2-abstract-full').style.display = 'inline'; document.getElementById('2406.14629v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14629v2-abstract-full" style="display: none;"> Teaching to improve student models (e.g., knowledge distillation) is an extensively studied methodology in LLMs. However, for humans, teaching improves not only students but also teachers, by fostering more rigorous and clear reasoning as well as knowledge building. We ask: Can LLMs also learn by teaching (LbT) for better reasoning? If the answer is yes, we can potentially unlock the possibility of continuously advancing the models without solely relying on human-produced data or stronger models. In this paper, we provide a preliminary exploration on this question. We show that LbT ideas can be incorporated into existing LLM training/prompting pipelines and bring improvements. Specifically, we design three methods, each mimicking one of the three levels of LbT: observing students&#39; feedback, learning from the feedback, and learning iteratively, with the goals of improving answer accuracy without training or improving models&#39; inherent capability with fine-tuning. We reveal some findings: (1) Teaching materials that make it easier for students to learn have clearer and more accurate logic when using in-context learning as the student&#39;s &#34;learning&#34; method; (2) Weak-to-strong generalization: LbT might help improve strong models by teaching weak models; (3) Diversity in students might help: teaching multiple students could be better than teaching one student or the teacher itself. We hope that our exploration can inspire future research on LbT and more broadly adopting the advanced techniques in education to improve LLMs. The code and website are at https://github.com/imagination-research/lbt and https://sites.google.com/view/llm-learning-by-teaching. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14629v2-abstract-full').style.display = 'none'; document.getElementById('2406.14629v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08552">arXiv:2406.08552</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.08552">pdf</a>, <a href="https://arxiv.org/format/2406.08552">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DiTFastAttn: Attention Compression for Diffusion Transformer Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Yuan%2C+Z">Zhihang Yuan</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+H">Hanling Zhang</a>, <a href="/search/?searchtype=author&amp;query=Lu%2C+P">Pu Lu</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+L">Linfeng Zhang</a>, <a href="/search/?searchtype=author&amp;query=Zhao%2C+T">Tianchen Zhao</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08552v2-abstract-short" style="display: inline;"> Diffusion Transformers (DiT) excel at image and video generation but face computational challenges due to the quadratic complexity of self-attention operators. We propose DiTFastAttn, a post-training compression method to alleviate the computational bottleneck of DiT. We identify three key redundancies in the attention computation during DiT inference: (1) spatial redundancy, where many attention&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08552v2-abstract-full').style.display = 'inline'; document.getElementById('2406.08552v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08552v2-abstract-full" style="display: none;"> Diffusion Transformers (DiT) excel at image and video generation but face computational challenges due to the quadratic complexity of self-attention operators. We propose DiTFastAttn, a post-training compression method to alleviate the computational bottleneck of DiT. We identify three key redundancies in the attention computation during DiT inference: (1) spatial redundancy, where many attention heads focus on local information; (2) temporal redundancy, with high similarity between the attention outputs of neighboring steps; (3) conditional redundancy, where conditional and unconditional inferences exhibit significant similarity. We propose three techniques to reduce these redundancies: (1) Window Attention with Residual Sharing to reduce spatial redundancy; (2) Attention Sharing across Timesteps to exploit the similarity between steps; (3) Attention Sharing across CFG to skip redundant computations during conditional generation. We apply DiTFastAttn to DiT, PixArt-Sigma for image generation tasks, and OpenSora for video generation tasks. Our results show that for image generation, our method reduces up to 76% of the attention FLOPs and achieves up to 1.8x end-to-end speedup at high-resolution (2k x 2k) generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08552v2-abstract-full').style.display = 'none'; document.getElementById('2406.08552v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07731">arXiv:2406.07731</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.07731">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> Reconfigurable, Multifunctional Origami Electronic Membranes for Mechanical and Environmental Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Yao%2C+Y">Yao Yao</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+G">Guanghui Li</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xin Ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07731v1-abstract-short" style="display: inline;"> This work introduces a concept of origami electronic membranes that leverages the design and fabrication of flexible electronics and the mechanical behavior of engineering origami to achieve unique multifunctional, shape-reconfigurable, and adaptive membranes for mechanical and environmental sensing in benign and harsh conditions. This paper presents the materials, design, and fabrication methods&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07731v1-abstract-full').style.display = 'inline'; document.getElementById('2406.07731v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07731v1-abstract-full" style="display: none;"> This work introduces a concept of origami electronic membranes that leverages the design and fabrication of flexible electronics and the mechanical behavior of engineering origami to achieve unique multifunctional, shape-reconfigurable, and adaptive membranes for mechanical and environmental sensing in benign and harsh conditions. This paper presents the materials, design, and fabrication methods for realizing six origami electronic membranes capable of reconfiguring planar or three-dimensional shapes based on the modified flasher, Kresling, Miura-ori, circular, letter, and Tachi-Miura origami patterns. These origami-based, thin-film flexible electronics can obtain both expansion and folding of their shapes, as well as transformation between different geometries. The origami electronic membranes can achieve mechanical and environmental sensing functions such as measuring motions, mechanical strains, temperatures, UV light, and humidity. The results reported here demonstrate the promise of combining engineering origami with flexible electronics to advance the state-of-the-art in multifunctional foldable and deployable electronics and systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07731v1-abstract-full').style.display = 'none'; document.getElementById('2406.07731v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02540">arXiv:2406.02540</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.02540">pdf</a>, <a href="https://arxiv.org/format/2406.02540">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ViDiT-Q: Efficient and Accurate Quantization of Diffusion Transformers for Image and Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Zhao%2C+T">Tianchen Zhao</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+T">Tongcheng Fang</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+E">Enshu Liu</a>, <a href="/search/?searchtype=author&amp;query=Wan%2C+R">Rui Wan</a>, <a href="/search/?searchtype=author&amp;query=Soedarmadji%2C+W">Widyadewi Soedarmadji</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+S">Shiyao Li</a>, <a href="/search/?searchtype=author&amp;query=Lin%2C+Z">Zinan Lin</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Yang%2C+H">Huazhong Yang</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02540v2-abstract-short" style="display: inline;"> Diffusion transformers (DiTs) have exhibited remarkable performance in visual generation tasks, such as generating realistic images or videos based on textual instructions. However, larger model sizes and multi-frame processing for video generation lead to increased computational and memory costs, posing challenges for practical deployment on edge devices. Post-Training Quantization (PTQ) is an ef&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02540v2-abstract-full').style.display = 'inline'; document.getElementById('2406.02540v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02540v2-abstract-full" style="display: none;"> Diffusion transformers (DiTs) have exhibited remarkable performance in visual generation tasks, such as generating realistic images or videos based on textual instructions. However, larger model sizes and multi-frame processing for video generation lead to increased computational and memory costs, posing challenges for practical deployment on edge devices. Post-Training Quantization (PTQ) is an effective method for reducing memory costs and computational complexity. When quantizing diffusion transformers, we find that applying existing diffusion quantization methods designed for U-Net faces challenges in preserving quality. After analyzing the major challenges for quantizing diffusion transformers, we design an improved quantization scheme: &#34;ViDiT-Q&#34;: Video and Image Diffusion Transformer Quantization) to address these issues. Furthermore, we identify highly sensitive layers and timesteps hinder quantization for lower bit-widths. To tackle this, we improve ViDiT-Q with a novel metric-decoupled mixed-precision quantization method (ViDiT-Q-MP). We validate the effectiveness of ViDiT-Q across a variety of text-to-image and video models. While baseline quantization methods fail at W8A8 and produce unreadable content at W4A8, ViDiT-Q achieves lossless W8A8 quantization. ViDiTQ-MP achieves W4A8 with negligible visual quality degradation, resulting in a 2.5x memory optimization and a 1.5x latency speedup. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02540v2-abstract-full').style.display = 'none'; document.getElementById('2406.02540v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://a-suozhang.xyz/viditq.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20710">arXiv:2405.20710</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.20710">pdf</a>, <a href="https://arxiv.org/format/2405.20710">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Information Maximization via Variational Autoencoders for Cross-Domain Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuying Ning</a>, <a href="/search/?searchtype=author&amp;query=Xu%2C+W">Wujiang Xu</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+X">Xiaolei Liu</a>, <a href="/search/?searchtype=author&amp;query=Ha%2C+M">Mingming Ha</a>, <a href="/search/?searchtype=author&amp;query=Ma%2C+Q">Qiongxu Ma</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+Y">Youru Li</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+L">Linxun Chen</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+Y">Yongfeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20710v1-abstract-short" style="display: inline;"> Cross-Domain Sequential Recommendation (CDSR) methods aim to address the data sparsity and cold-start problems present in Single-Domain Sequential Recommendation (SDSR). Existing CDSR methods typically rely on overlapping users, designing complex cross-domain modules to capture users&#39; latent interests that can propagate across different domains. However, their propagated informative information is&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20710v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20710v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20710v1-abstract-full" style="display: none;"> Cross-Domain Sequential Recommendation (CDSR) methods aim to address the data sparsity and cold-start problems present in Single-Domain Sequential Recommendation (SDSR). Existing CDSR methods typically rely on overlapping users, designing complex cross-domain modules to capture users&#39; latent interests that can propagate across different domains. However, their propagated informative information is limited to the overlapping users and the users who have rich historical behavior records. As a result, these methods often underperform in real-world scenarios, where most users are non-overlapping (cold-start) and long-tailed. In this research, we introduce a new CDSR framework named Information Maximization Variational Autoencoder (\textbf{\texttt{IM-VAE}}). Here, we suggest using a Pseudo-Sequence Generator to enhance the user&#39;s interaction history input for downstream fine-grained CDSR models to alleviate the cold-start issues. We also propose a Generative Recommendation Framework combined with three regularizers inspired by the mutual information maximization (MIM) theory \cite{mcgill1954multivariate} to capture the semantic differences between a user&#39;s interests shared across domains and those specific to certain domains, as well as address the informational gap between a user&#39;s actual interaction sequences and the pseudo-sequences generated. To the best of our knowledge, this paper is the first CDSR work that considers the information disentanglement and denoising of pseudo-sequences in the open-world recommendation scenario. Empirical experiments illustrate that \texttt{IM-VAE} outperforms the state-of-the-art approaches on two real-world cross-domain datasets on all sorts of users, including cold-start and tailed users, demonstrating the effectiveness of \texttt{IM-VAE} in open-world recommendation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20710v1-abstract-full').style.display = 'none'; document.getElementById('2405.20710v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17890">arXiv:2405.17890</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.17890">pdf</a>, <a href="https://arxiv.org/format/2405.17890">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SLMRec: Empowering Small Language Models for Sequential Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Xu%2C+W">Wujiang Xu</a>, <a href="/search/?searchtype=author&amp;query=Wu%2C+Q">Qitian Wu</a>, <a href="/search/?searchtype=author&amp;query=Liang%2C+Z">Zujie Liang</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+J">Jiaojiao Han</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuying Ning</a>, <a href="/search/?searchtype=author&amp;query=Shi%2C+Y">Yunxiao Shi</a>, <a href="/search/?searchtype=author&amp;query=Lin%2C+W">Wenfang Lin</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+Y">Yongfeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17890v2-abstract-short" style="display: inline;"> Sequential Recommendation (SR) task involves predicting the next item a user is likely to interact with, given their past interactions. The SR models examine the sequence of a user&#39;s actions to discern more complex behavioral patterns and temporal dynamics. Recent research demonstrates the great impact of LLMs on sequential recommendation systems, either viewing sequential recommendation as langua&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17890v2-abstract-full').style.display = 'inline'; document.getElementById('2405.17890v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17890v2-abstract-full" style="display: none;"> Sequential Recommendation (SR) task involves predicting the next item a user is likely to interact with, given their past interactions. The SR models examine the sequence of a user&#39;s actions to discern more complex behavioral patterns and temporal dynamics. Recent research demonstrates the great impact of LLMs on sequential recommendation systems, either viewing sequential recommendation as language modeling or serving as the backbone for user representation. Although these methods deliver outstanding performance, there is scant evidence of the necessity of a large language model and how large the language model is needed, especially in the sequential recommendation scene. Meanwhile, due to the huge size of LLMs, it is inefficient and impractical to apply a LLM-based model in real-world platforms that often need to process billions of traffic logs daily. In this paper, we explore the influence of LLMs&#39; depth by conducting extensive experiments on large-scale industry datasets. Surprisingly, our motivational experiments reveal that most intermediate layers of LLMs are redundant, indicating that pruning the remaining layers can still maintain strong performance. Motivated by this insight, we empower small language models for SR, namely SLMRec, which adopt a simple yet effective knowledge distillation method. Moreover, SLMRec is orthogonal to other post-training efficiency techniques, such as quantization and pruning, so that they can be leveraged in combination. Comprehensive experimental results illustrate that the proposed SLMRec model attains the best performance using only 13% of the parameters found in LLM-based recommendation models while simultaneously achieving up to 6.6x and 8.0x speedups in training and inference time costs, respectively. Besides, we provide a theoretical justification for why small language models can perform comparably to large language models in SR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17890v2-abstract-full').style.display = 'none'; document.getElementById('2405.17890v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17873">arXiv:2405.17873</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.17873">pdf</a>, <a href="https://arxiv.org/format/2405.17873">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MixDQ: Memory-Efficient Few-Step Text-to-Image Diffusion Models with Metric-Decoupled Mixed Precision Quantization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Zhao%2C+T">Tianchen Zhao</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+T">Tongcheng Fang</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+E">Enshu Liu</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+G">Guyue Huang</a>, <a href="/search/?searchtype=author&amp;query=Lin%2C+Z">Zinan Lin</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17873v2-abstract-short" style="display: inline;"> Diffusion models have achieved significant visual generation quality. However, their significant computational and memory costs pose challenge for their application on resource-constrained mobile devices or even desktop GPUs. Recent few-step diffusion models reduces the inference time by reducing the denoising steps. However, their memory consumptions are still excessive. The Post Training Quantiz&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17873v2-abstract-full').style.display = 'inline'; document.getElementById('2405.17873v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17873v2-abstract-full" style="display: none;"> Diffusion models have achieved significant visual generation quality. However, their significant computational and memory costs pose challenge for their application on resource-constrained mobile devices or even desktop GPUs. Recent few-step diffusion models reduces the inference time by reducing the denoising steps. However, their memory consumptions are still excessive. The Post Training Quantization (PTQ) replaces high bit-width FP representation with low-bit integer values (INT4/8) , which is an effective and efficient technique to reduce the memory cost. However, when applying to few-step diffusion models, existing quantization methods face challenges in preserving both the image quality and text alignment. To address this issue, we propose an mixed-precision quantization framework - MixDQ. Firstly, We design specialized BOS-aware quantization method for highly sensitive text embedding quantization. Then, we conduct metric-decoupled sensitivity analysis to measure the sensitivity of each layer. Finally, we develop an integer-programming-based method to conduct bit-width allocation. While existing quantization methods fall short at W8A8, MixDQ could achieve W8A8 without performance loss, and W4A8 with negligible visual degradation. Compared with FP16, we achieve 3-4x reduction in model size and memory cost, and 1.45x latency speedup. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17873v2-abstract-full').style.display = 'none'; document.getElementById('2405.17873v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://a-suozhang.xyz/mixdq.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16256">arXiv:2405.16256</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.16256">pdf</a>, <a href="https://arxiv.org/format/2405.16256">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HETHUB: A Distributed Training System with Heterogeneous Cluster for Large-Scale Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Xu%2C+S">Si Xu</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+Z">Zixiao Huang</a>, <a href="/search/?searchtype=author&amp;query=Zeng%2C+Y">Yan Zeng</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+Q">Quanlu Zhang</a>, <a href="/search/?searchtype=author&amp;query=Ye%2C+H">Haolin Ye</a>, <a href="/search/?searchtype=author&amp;query=Gu%2C+S">Sipei Gu</a>, <a href="/search/?searchtype=author&amp;query=Shui%2C+C">Chunsheng Shui</a>, <a href="/search/?searchtype=author&amp;query=Lin%2C+Z">Zhezheng Lin</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+S">Sheng Wang</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16256v2-abstract-short" style="display: inline;"> Training large-scale models relies on a vast number of computing resources. For example, training the GPT-4 model (1.8 trillion parameters) requires 25000 A100 GPUs . It is a challenge to build a large-scale cluster with one type of GPU-accelerator. Using multiple types of GPU-accelerators to construct a large-scale cluster is an effective way to solve the problem of insufficient homogeneous GPU-a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16256v2-abstract-full').style.display = 'inline'; document.getElementById('2405.16256v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16256v2-abstract-full" style="display: none;"> Training large-scale models relies on a vast number of computing resources. For example, training the GPT-4 model (1.8 trillion parameters) requires 25000 A100 GPUs . It is a challenge to build a large-scale cluster with one type of GPU-accelerator. Using multiple types of GPU-accelerators to construct a large-scale cluster is an effective way to solve the problem of insufficient homogeneous GPU-accelerators. However, the existing distributed training systems for large-scale models only support homogeneous GPU-accelerators, not support heterogeneous GPU-accelerators. To address the problem, this paper proposes a distributed training system with hybrid parallelism, HETHUB, for large-scale models, which supports heterogeneous cluster, including AMD, Nvidia GPU and other types of GPU-accelerators . It introduces a distributed unified communicator to realize the communication between heterogeneous GPU-accelerators, a distributed performance predictor, and an automatic parallel planner to develop and train models efficiently with heterogeneous GPU-accelerators. Compared to the distributed training system with homogeneous GPU-accelerators, our system can support six combinations of heterogeneous GPU-accelerators. We train the Llama-140B model on a heterogeneous cluster with 768 GPU-accelerators(128 AMD and 640 GPU-accelerator A). The experiment results show that the optimal performance of our system in the heterogeneous cluster has achieved up to 97.49% of the theoretical upper bound performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16256v2-abstract-full').style.display = 'none'; document.getElementById('2405.16256v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14224">arXiv:2405.14224</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.14224">pdf</a>, <a href="https://arxiv.org/format/2405.14224">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DiM: Diffusion Mamba for Efficient High-Resolution Image Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Teng%2C+Y">Yao Teng</a>, <a href="/search/?searchtype=author&amp;query=Wu%2C+Y">Yue Wu</a>, <a href="/search/?searchtype=author&amp;query=Shi%2C+H">Han Shi</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+Z">Zhenguo Li</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+X">Xihui Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14224v2-abstract-short" style="display: inline;"> Diffusion models have achieved great success in image generation, with the backbone evolving from U-Net to Vision Transformers. However, the computational cost of Transformers is quadratic to the number of tokens, leading to significant challenges when dealing with high-resolution images. In this work, we propose Diffusion Mamba (DiM), which combines the efficiency of Mamba, a sequence model based&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14224v2-abstract-full').style.display = 'inline'; document.getElementById('2405.14224v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14224v2-abstract-full" style="display: none;"> Diffusion models have achieved great success in image generation, with the backbone evolving from U-Net to Vision Transformers. However, the computational cost of Transformers is quadratic to the number of tokens, leading to significant challenges when dealing with high-resolution images. In this work, we propose Diffusion Mamba (DiM), which combines the efficiency of Mamba, a sequence model based on State Space Models (SSM), with the expressive power of diffusion models for efficient high-resolution image synthesis. To address the challenge that Mamba cannot generalize to 2D signals, we make several architecture designs including multi-directional scans, learnable padding tokens at the end of each row and column, and lightweight local feature enhancement. Our DiM architecture achieves inference-time efficiency for high-resolution images. In addition, to further improve training efficiency for high-resolution image generation with DiM, we investigate &#34;weak-to-strong&#34; training strategy that pretrains DiM on low-resolution images ($256\times 256$) and then finetune it on high-resolution images ($512 \times 512$). We further explore training-free upsampling strategies to enable the model to generate higher-resolution images (e.g., $1024\times 1024$ and $1536\times 1536$) without further fine-tuning. Experiments demonstrate the effectiveness and efficiency of our DiM. The code of our work is available here: {\url{https://github.com/tyshiwo1/DiM-DiffusionMamba/}}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14224v2-abstract-full').style.display = 'none'; document.getElementById('2405.14224v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The code of our work is available here: {\url{https://github.com/tyshiwo1/DiM-DiffusionMamba/}}</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09048">arXiv:2405.09048</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.09048">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Beam Shaping Based on Axisymmetric Aspheric Mirrors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Chen%2C+Z">Zhihao Chen</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xiaonan Ning</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+J">Jiucheng Chen</a>, <a href="/search/?searchtype=author&amp;query=Hua%2C+J">Jianfei Hua</a>, <a href="/search/?searchtype=author&amp;query=Lu%2C+W">Wei Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09048v1-abstract-short" style="display: inline;"> Flat-top beam, known for its ability to generate a consistently even irradiation area, holds vast utility in many fields of scientific and industrial applications. In this paper, a reflective laser beam shaping method based on two axisymmetric aspheric mirrors (AAMs), a polarizing beam splitter (PBS) and two quarter wave plates (QWPs) is proposed to transform Gaussian beam into flat-top beam. Comp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09048v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09048v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09048v1-abstract-full" style="display: none;"> Flat-top beam, known for its ability to generate a consistently even irradiation area, holds vast utility in many fields of scientific and industrial applications. In this paper, a reflective laser beam shaping method based on two axisymmetric aspheric mirrors (AAMs), a polarizing beam splitter (PBS) and two quarter wave plates (QWPs) is proposed to transform Gaussian beam into flat-top beam. Compared to alternative beam shaping methods, the method using AAMs demonstrates distinct advantages on notably high energy efficiency and unique capability to generate parallel beams. Thanks to its relative simplicities of design, manufacture and tunability, AAMs-shaping further enhances its appeal in applied research scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09048v1-abstract-full').style.display = 'none'; document.getElementById('2405.09048v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01026">arXiv:2405.01026</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.01026">pdf</a>, <a href="https://arxiv.org/format/2405.01026">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Asymptotic Results for Penalized Quasi-Likelihood Estimation in Generalized Linear Mixed Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xu Ning</a>, <a href="/search/?searchtype=author&amp;query=Hui%2C+F">Francis Hui</a>, <a href="/search/?searchtype=author&amp;query=Welsh%2C+A">Alan Welsh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01026v1-abstract-short" style="display: inline;"> Generalized Linear Mixed Models (GLMMs) are widely used for analysing clustered data. One well-established method of overcoming the integral in the marginal likelihood function for GLMMs is penalized quasi-likelihood (PQL) estimation, although to date there are few asymptotic distribution results relating to PQL estimation for GLMMs in the literature. In this paper, we establish large sample resul&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01026v1-abstract-full').style.display = 'inline'; document.getElementById('2405.01026v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01026v1-abstract-full" style="display: none;"> Generalized Linear Mixed Models (GLMMs) are widely used for analysing clustered data. One well-established method of overcoming the integral in the marginal likelihood function for GLMMs is penalized quasi-likelihood (PQL) estimation, although to date there are few asymptotic distribution results relating to PQL estimation for GLMMs in the literature. In this paper, we establish large sample results for PQL estimators of the parameters and random effects in independent-cluster GLMMs, when both the number of clusters and the cluster sizes go to infinity. This is done under two distinct regimes: conditional on the random effects (essentially treating them as fixed effects) and unconditionally (treating the random effects as random). Under the conditional regime, we show the PQL estimators are asymptotically normal around the true fixed and random effects. Unconditionally, we prove that while the estimator of the fixed effects is asymptotically normally distributed, the correct asymptotic distribution of the so-called prediction gap of the random effects may in fact be a normal scale-mixture distribution under certain relative rates of growth. A simulation study is used to verify the finite sample performance of our theoretical results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01026v1-abstract-full').style.display = 'none'; document.getElementById('2405.01026v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.15760">arXiv:2404.15760</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.15760">pdf</a>, <a href="https://arxiv.org/format/2404.15760">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Debiasing Machine Unlearning with Counterfactual Examples </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Chen%2C+Z">Ziheng Chen</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+J">Jia Wang</a>, <a href="/search/?searchtype=author&amp;query=Zhuang%2C+J">Jun Zhuang</a>, <a href="/search/?searchtype=author&amp;query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/?searchtype=author&amp;query=Silvestri%2C+F">Fabrizio Silvestri</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+J">Jin Huang</a>, <a href="/search/?searchtype=author&amp;query=Nag%2C+K">Kaushiki Nag</a>, <a href="/search/?searchtype=author&amp;query=Kuang%2C+K">Kun Kuang</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xin Ning</a>, <a href="/search/?searchtype=author&amp;query=Tolomei%2C+G">Gabriele Tolomei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.15760v1-abstract-short" style="display: inline;"> The right to be forgotten (RTBF) seeks to safeguard individuals from the enduring effects of their historical actions by implementing machine-learning techniques. These techniques facilitate the deletion of previously acquired knowledge without requiring extensive model retraining. However, they often overlook a critical issue: unlearning processes bias. This bias emerges from two main sources: (1&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15760v1-abstract-full').style.display = 'inline'; document.getElementById('2404.15760v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.15760v1-abstract-full" style="display: none;"> The right to be forgotten (RTBF) seeks to safeguard individuals from the enduring effects of their historical actions by implementing machine-learning techniques. These techniques facilitate the deletion of previously acquired knowledge without requiring extensive model retraining. However, they often overlook a critical issue: unlearning processes bias. This bias emerges from two main sources: (1) data-level bias, characterized by uneven data removal, and (2) algorithm-level bias, which leads to the contamination of the remaining dataset, thereby degrading model accuracy. In this work, we analyze the causal factors behind the unlearning process and mitigate biases at both data and algorithmic levels. Typically, we introduce an intervention-based approach, where knowledge to forget is erased with a debiased dataset. Besides, we guide the forgetting procedure by leveraging counterfactual examples, as they maintain semantic data consistency without hurting performance on the remaining dataset. Experimental results demonstrate that our method outperforms existing machine unlearning baselines on evaluation metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15760v1-abstract-full').style.display = 'none'; document.getElementById('2404.15760v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.15264">arXiv:2404.15264</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.15264">pdf</a>, <a href="https://arxiv.org/format/2404.15264">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TalkingGaussian: Structure-Persistent 3D Talking Head Synthesis via Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Li%2C+J">Jiahe Li</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+J">Jiawei Zhang</a>, <a href="/search/?searchtype=author&amp;query=Bai%2C+X">Xiao Bai</a>, <a href="/search/?searchtype=author&amp;query=Zheng%2C+J">Jin Zheng</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xin Ning</a>, <a href="/search/?searchtype=author&amp;query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/?searchtype=author&amp;query=Gu%2C+L">Lin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.15264v2-abstract-short" style="display: inline;"> Radiance fields have demonstrated impressive performance in synthesizing lifelike 3D talking heads. However, due to the difficulty in fitting steep appearance changes, the prevailing paradigm that presents facial motions by directly modifying point appearance may lead to distortions in dynamic regions. To tackle this challenge, we introduce TalkingGaussian, a deformation-based radiance fields fram&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15264v2-abstract-full').style.display = 'inline'; document.getElementById('2404.15264v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.15264v2-abstract-full" style="display: none;"> Radiance fields have demonstrated impressive performance in synthesizing lifelike 3D talking heads. However, due to the difficulty in fitting steep appearance changes, the prevailing paradigm that presents facial motions by directly modifying point appearance may lead to distortions in dynamic regions. To tackle this challenge, we introduce TalkingGaussian, a deformation-based radiance fields framework for high-fidelity talking head synthesis. Leveraging the point-based Gaussian Splatting, facial motions can be represented in our method by applying smooth and continuous deformations to persistent Gaussian primitives, without requiring to learn the difficult appearance change like previous methods. Due to this simplification, precise facial motions can be synthesized while keeping a highly intact facial feature. Under such a deformation paradigm, we further identify a face-mouth motion inconsistency that would affect the learning of detailed speaking motions. To address this conflict, we decompose the model into two branches separately for the face and inside mouth areas, therefore simplifying the learning tasks to help reconstruct more accurate motion and structure of the mouth region. Extensive experiments demonstrate that our method renders high-quality lip-synchronized talking head videos, with better facial fidelity and higher efficiency compared with previous methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15264v2-abstract-full').style.display = 'none'; document.getElementById('2404.15264v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ECCV 2024. Project page: https://fictionarry.github.io/TalkingGaussian/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14294">arXiv:2404.14294</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.14294">pdf</a>, <a href="https://arxiv.org/format/2404.14294">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Efficient Inference for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Zhou%2C+Z">Zixuan Zhou</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Hong%2C+K">Ke Hong</a>, <a href="/search/?searchtype=author&amp;query=Fu%2C+T">Tianyu Fu</a>, <a href="/search/?searchtype=author&amp;query=Xu%2C+J">Jiaming Xu</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+S">Shiyao Li</a>, <a href="/search/?searchtype=author&amp;query=Lou%2C+Y">Yuming Lou</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+L">Luning Wang</a>, <a href="/search/?searchtype=author&amp;query=Yuan%2C+Z">Zhihang Yuan</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+X">Xiuhong Li</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+X">Xiao-Ping Zhang</a>, <a href="/search/?searchtype=author&amp;query=Dong%2C+Y">Yuhan Dong</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14294v3-abstract-short" style="display: inline;"> Large Language Models (LLMs) have attracted extensive attention due to their remarkable performance across various tasks. However, the substantial computational and memory requirements of LLM inference pose challenges for deployment in resource-constrained scenarios. Efforts within the field have been directed towards developing techniques aimed at enhancing the efficiency of LLM inference. This p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14294v3-abstract-full').style.display = 'inline'; document.getElementById('2404.14294v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14294v3-abstract-full" style="display: none;"> Large Language Models (LLMs) have attracted extensive attention due to their remarkable performance across various tasks. However, the substantial computational and memory requirements of LLM inference pose challenges for deployment in resource-constrained scenarios. Efforts within the field have been directed towards developing techniques aimed at enhancing the efficiency of LLM inference. This paper presents a comprehensive survey of the existing literature on efficient LLM inference. We start by analyzing the primary causes of the inefficient LLM inference, i.e., the large model size, the quadratic-complexity attention operation, and the auto-regressive decoding approach. Then, we introduce a comprehensive taxonomy that organizes the current literature into data-level, model-level, and system-level optimization. Moreover, the paper includes comparative experiments on representative methods within critical sub-fields to provide quantitative insights. Last but not least, we provide some knowledge summary and discuss future research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14294v3-abstract-full').style.display = 'none'; document.getElementById('2404.14294v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.02241">arXiv:2404.02241</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.02241">pdf</a>, <a href="https://arxiv.org/format/2404.02241">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Linear Combination of Saved Checkpoints Makes Consistency and Diffusion Models Better </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Liu%2C+E">Enshu Liu</a>, <a href="/search/?searchtype=author&amp;query=Zhu%2C+J">Junyi Zhu</a>, <a href="/search/?searchtype=author&amp;query=Lin%2C+Z">Zinan Lin</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Blaschko%2C+M+B">Matthew B. Blaschko</a>, <a href="/search/?searchtype=author&amp;query=Yekhanin%2C+S">Sergey Yekhanin</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Yang%2C+H">Huazhong Yang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.02241v2-abstract-short" style="display: inline;"> Diffusion Models (DM) and Consistency Models (CM) are two types of popular generative models with good generation quality on various tasks. When training DM and CM, intermediate weight checkpoints are not fully utilized and only the last converged checkpoint is used. In this work, we find that high-quality model weights often lie in a basin which cannot be reached by SGD but can be obtained by pro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02241v2-abstract-full').style.display = 'inline'; document.getElementById('2404.02241v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.02241v2-abstract-full" style="display: none;"> Diffusion Models (DM) and Consistency Models (CM) are two types of popular generative models with good generation quality on various tasks. When training DM and CM, intermediate weight checkpoints are not fully utilized and only the last converged checkpoint is used. In this work, we find that high-quality model weights often lie in a basin which cannot be reached by SGD but can be obtained by proper checkpoint averaging. Based on these observations, we propose LCSC, a simple but effective and efficient method to enhance the performance of DM and CM, by combining checkpoints along the training trajectory with coefficients deduced from evolutionary search. We demonstrate the value of LCSC through two use cases: $\textbf{(a) Reducing training cost.}$ With LCSC, we only need to train DM/CM with fewer number of iterations and/or lower batch sizes to obtain comparable sample quality with the fully trained model. For example, LCSC achieves considerable training speedups for CM (23$\times$ on CIFAR-10 and 15$\times$ on ImageNet-64). $\textbf{(b) Enhancing pre-trained models.}$ Assuming full training is already done, LCSC can further improve the generation quality or speed of the final converged models. For example, LCSC achieves better performance using 1 number of function evaluation (NFE) than the base model with 2 NFE on consistency distillation, and decreases the NFE of DM from 15 to 9 while maintaining the generation quality on CIFAR-10. Our code is available at https://github.com/imagination-research/LCSC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02241v2-abstract-full').style.display = 'none'; document.getElementById('2404.02241v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.17491">arXiv:2403.17491</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.17491">pdf</a>, <a href="https://arxiv.org/format/2403.17491">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DGoT: Dynamic Graph of Thoughts for Scientific Abstract Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xinyu Ning</a>, <a href="/search/?searchtype=author&amp;query=Zhao%2C+Y">Yutong Zhao</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+Y">Yitong Liu</a>, <a href="/search/?searchtype=author&amp;query=Yang%2C+H">Hongwen Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.17491v1-abstract-short" style="display: inline;"> The method of training language models based on domain datasets has obtained significant achievements in the task of generating scientific paper abstracts. However, such models face problems of generalization and expensive training costs. The use of large language models (LLMs) to solve the task of generating paper abstracts saves the cost of model training. However, due to the hallucination probl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17491v1-abstract-full').style.display = 'inline'; document.getElementById('2403.17491v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.17491v1-abstract-full" style="display: none;"> The method of training language models based on domain datasets has obtained significant achievements in the task of generating scientific paper abstracts. However, such models face problems of generalization and expensive training costs. The use of large language models (LLMs) to solve the task of generating paper abstracts saves the cost of model training. However, due to the hallucination problem of LLM, it is often necessary to improve the reliability of the results through multi-round query prompt approach such as Graph of Thoughts (GoT), which also brings additional reasoning costs. In this paper, we propose a Dynamic Graph of Thought (DGoT). It not only inherits the advantages of the existing GoT prompt approach, but also dynamically adjust the graph structure according to data characteristics while reducing model reasoning cost. Experimental results show that our method&#39;s cost-effectiveness in abstract generation tasks is only 43.7% to 56.4% of other multi-round query prompt approaches. Our code is available at https://github.com/JayceNing/DGoT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17491v1-abstract-full').style.display = 'none'; document.getElementById('2403.17491v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by LREC-COLING 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16379">arXiv:2403.16379</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.16379">pdf</a>, <a href="https://arxiv.org/format/2403.16379">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FlashEval: Towards Fast and Accurate Evaluation of Text-to-image Diffusion Generative Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Zhao%2C+L">Lin Zhao</a>, <a href="/search/?searchtype=author&amp;query=Zhao%2C+T">Tianchen Zhao</a>, <a href="/search/?searchtype=author&amp;query=Lin%2C+Z">Zinan Lin</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Yang%2C+H">Huazhong Yang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16379v1-abstract-short" style="display: inline;"> In recent years, there has been significant progress in the development of text-to-image generative models. Evaluating the quality of the generative models is one essential step in the development process. Unfortunately, the evaluation process could consume a significant amount of computational resources, making the required periodic evaluation of model performance (e.g., monitoring training progr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16379v1-abstract-full').style.display = 'inline'; document.getElementById('2403.16379v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16379v1-abstract-full" style="display: none;"> In recent years, there has been significant progress in the development of text-to-image generative models. Evaluating the quality of the generative models is one essential step in the development process. Unfortunately, the evaluation process could consume a significant amount of computational resources, making the required periodic evaluation of model performance (e.g., monitoring training progress) impractical. Therefore, we seek to improve the evaluation efficiency by selecting the representative subset of the text-image dataset. We systematically investigate the design choices, including the selection criteria (textural features or image-based metrics) and the selection granularity (prompt-level or set-level). We find that the insights from prior work on subset selection for training data do not generalize to this problem, and we propose FlashEval, an iterative search algorithm tailored to evaluation data selection. We demonstrate the effectiveness of FlashEval on ranking diffusion models with various configurations, including architectures, quantization levels, and sampler schedules on COCO and DiffusionDB datasets. Our searched 50-item subset could achieve comparable evaluation quality to the randomly sampled 500-item subset for COCO annotations on unseen models, achieving a 10x evaluation speedup. We release the condensed subset of these commonly used datasets to help facilitate diffusion algorithm design and evaluation, and open-source FlashEval as a tool for condensing future datasets, accessible at https://github.com/thu-nics/FlashEval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16379v1-abstract-full').style.display = 'none'; document.getElementById('2403.16379v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper is accepted by CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.08361">arXiv:2403.08361</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.08361">pdf</a>, <a href="https://arxiv.org/format/2403.08361">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Phenomenology">hep-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1103/PhysRevLett.133.101805">10.1103/PhysRevLett.133.101805 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Search for Cosmic-ray Boosted Sub-MeV Dark-Matter-Electron Scattering in PandaX-4T </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Shang%2C+X">Xiaofeng Shang</a>, <a href="/search/?searchtype=author&amp;query=Abdukerim%2C+A">Abdusalam Abdukerim</a>, <a href="/search/?searchtype=author&amp;query=Bo%2C+Z">Zihao Bo</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+X">Xun Chen</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+C">Chen Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+Z">Zhaokan Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cui%2C+X">Xiangyi Cui</a>, <a href="/search/?searchtype=author&amp;query=Fan%2C+Y">Yingjie Fan</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+D">Deqing Fang</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+L">Lisheng Geng</a>, <a href="/search/?searchtype=author&amp;query=Giboni%2C+K">Karl Giboni</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xuyuan Guo</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+C">Chencheng Han</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+K">Ke Han</a>, <a href="/search/?searchtype=author&amp;query=He%2C+C">Changda He</a>, <a href="/search/?searchtype=author&amp;query=He%2C+J">Jinrong He</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+D">Di Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+J">Junting Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+Z">Zhou Huang</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+R">Ruquan Hou</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+Y">Yu Hou</a>, <a href="/search/?searchtype=author&amp;query=Ji%2C+X">Xiangdong Ji</a>, <a href="/search/?searchtype=author&amp;query=Ju%2C+Y">Yonglin Ju</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+C">Chenxiang Li</a> , et al. (67 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.08361v2-abstract-short" style="display: inline;"> We report the first search for the elastic scatterings between cosmic-ray boosted sub-MeV dark matter and electrons in the PandaX-4T liquid xenon experiment. Sub-MeV dark matter particles can be accelerated by scattering with electrons in the cosmic rays and produce detectable electron recoil signals in the detector. Using the commissioning data from PandaX-4T of 0.63~tonne$\cdot$year exposure, we&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08361v2-abstract-full').style.display = 'inline'; document.getElementById('2403.08361v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.08361v2-abstract-full" style="display: none;"> We report the first search for the elastic scatterings between cosmic-ray boosted sub-MeV dark matter and electrons in the PandaX-4T liquid xenon experiment. Sub-MeV dark matter particles can be accelerated by scattering with electrons in the cosmic rays and produce detectable electron recoil signals in the detector. Using the commissioning data from PandaX-4T of 0.63~tonne$\cdot$year exposure, we set new constraints on DM-electron scattering cross sections for DM masses ranging from 10~eV/$c^2$ to 3~keV/$c^2$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08361v2-abstract-full').style.display = 'none'; document.getElementById('2403.08361v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Phys. Rev. Lett. 133, 101805 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.06912">arXiv:2403.06912</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.06912">pdf</a>, <a href="https://arxiv.org/format/2403.06912">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DNGaussian: Optimizing Sparse-View 3D Gaussian Radiance Fields with Global-Local Depth Normalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Li%2C+J">Jiahe Li</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+J">Jiawei Zhang</a>, <a href="/search/?searchtype=author&amp;query=Bai%2C+X">Xiao Bai</a>, <a href="/search/?searchtype=author&amp;query=Zheng%2C+J">Jin Zheng</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xin Ning</a>, <a href="/search/?searchtype=author&amp;query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/?searchtype=author&amp;query=Gu%2C+L">Lin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.06912v3-abstract-short" style="display: inline;"> Radiance fields have demonstrated impressive performance in synthesizing novel views from sparse input views, yet prevailing methods suffer from high training costs and slow inference speed. This paper introduces DNGaussian, a depth-regularized framework based on 3D Gaussian radiance fields, offering real-time and high-quality few-shot novel view synthesis at low costs. Our motivation stems from t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06912v3-abstract-full').style.display = 'inline'; document.getElementById('2403.06912v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.06912v3-abstract-full" style="display: none;"> Radiance fields have demonstrated impressive performance in synthesizing novel views from sparse input views, yet prevailing methods suffer from high training costs and slow inference speed. This paper introduces DNGaussian, a depth-regularized framework based on 3D Gaussian radiance fields, offering real-time and high-quality few-shot novel view synthesis at low costs. Our motivation stems from the highly efficient representation and surprising quality of the recent 3D Gaussian Splatting, despite it will encounter a geometry degradation when input views decrease. In the Gaussian radiance fields, we find this degradation in scene geometry primarily lined to the positioning of Gaussian primitives and can be mitigated by depth constraint. Consequently, we propose a Hard and Soft Depth Regularization to restore accurate scene geometry under coarse monocular depth supervision while maintaining a fine-grained color appearance. To further refine detailed geometry reshaping, we introduce Global-Local Depth Normalization, enhancing the focus on small local depth changes. Extensive experiments on LLFF, DTU, and Blender datasets demonstrate that DNGaussian outperforms state-of-the-art methods, achieving comparable or better results with significantly reduced memory cost, a $25 \times$ reduction in training time, and over $3000 \times$ faster rendering speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06912v3-abstract-full').style.display = 'none'; document.getElementById('2403.06912v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at CVPR 2024. Project page: https://fictionarry.github.io/DNGaussian/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.06220">arXiv:2403.06220</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.06220">pdf</a>, <a href="https://arxiv.org/format/2403.06220">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> </div> </div> <p class="title is-5 mathjax"> Detecting Neutrinos from Supernova Bursts in PandaX-4T </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Pang%2C+B">Binyu Pang</a>, <a href="/search/?searchtype=author&amp;query=Abdukerim%2C+A">Abdusalam Abdukerim</a>, <a href="/search/?searchtype=author&amp;query=Bo%2C+Z">Zihao Bo</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+X">Xun Chen</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+C">Chen Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+Z">Zhaokan Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cui%2C+X">Xiangyi Cui</a>, <a href="/search/?searchtype=author&amp;query=Fan%2C+Y">Yingjie Fan</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+D">Deqing Fang</a>, <a href="/search/?searchtype=author&amp;query=Fu%2C+C">Changbo Fu</a>, <a href="/search/?searchtype=author&amp;query=Fu%2C+M">Mengting Fu</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+L">Lisheng Geng</a>, <a href="/search/?searchtype=author&amp;query=Giboni%2C+K">Karl Giboni</a>, <a href="/search/?searchtype=author&amp;query=Gu%2C+L">Linhui Gu</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xuyuan Guo</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+C">Chencheng Han</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+K">Ke Han</a>, <a href="/search/?searchtype=author&amp;query=He%2C+C">Changda He</a>, <a href="/search/?searchtype=author&amp;query=He%2C+J">Jinrong He</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+D">Di Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+Y">Yanlin Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+J">Junting Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+Z">Zhou Huang</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+R">Ruquan Hou</a> , et al. (71 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.06220v1-abstract-short" style="display: inline;"> Neutrinos from core-collapse supernovae are essential for the understanding of neutrino physics and stellar evolution. The dual-phase xenon dark matter detectors can provide a way to track explosions of galactic supernovae by detecting neutrinos through coherent elastic neutrino-nucleus scatterings. In this study, a variation of progenitor masses as well as explosion models are assumed to predict&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06220v1-abstract-full').style.display = 'inline'; document.getElementById('2403.06220v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.06220v1-abstract-full" style="display: none;"> Neutrinos from core-collapse supernovae are essential for the understanding of neutrino physics and stellar evolution. The dual-phase xenon dark matter detectors can provide a way to track explosions of galactic supernovae by detecting neutrinos through coherent elastic neutrino-nucleus scatterings. In this study, a variation of progenitor masses as well as explosion models are assumed to predict the neutrino fluxes and spectra, which result in the number of expected neutrino events ranging from 6.6 to 13.7 at a distance of 10 kpc over a 10-second duration with negligible backgrounds at PandaX-4T. Two specialized triggering alarms for monitoring supernova burst neutrinos are built. The efficiency of detecting supernova explosions at various distances in the Milky Way is estimated. These alarms will be implemented in the real-time supernova monitoring system at PandaX-4T in the near future, providing the astronomical communities with supernova early warnings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06220v1-abstract-full').style.display = 'none'; document.getElementById('2403.06220v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages,6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.04239">arXiv:2403.04239</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.04239">pdf</a>, <a href="https://arxiv.org/format/2403.04239">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Signal Response Model in PandaX-4T </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Luo%2C+Y">Yunyang Luo</a>, <a href="/search/?searchtype=author&amp;query=Bo%2C+Z">Zihao Bo</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+S">Shibo Zhang</a>, <a href="/search/?searchtype=author&amp;query=Abdukerim%2C+A">Abdusalam Abdukerim</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+C">Chen Cheng</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+X">Xun Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Y">Yunhua Chen</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+Z">Zhaokan Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cui%2C+X">Xiangyi Cui</a>, <a href="/search/?searchtype=author&amp;query=Fan%2C+Y">Yingjie Fan</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+D">Deqing Fang</a>, <a href="/search/?searchtype=author&amp;query=Fu%2C+C">Changbo Fu</a>, <a href="/search/?searchtype=author&amp;query=Fu%2C+M">Mengting Fu</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+L">Lisheng Geng</a>, <a href="/search/?searchtype=author&amp;query=Giboni%2C+K">Karl Giboni</a>, <a href="/search/?searchtype=author&amp;query=Gu%2C+L">Linhui Gu</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xuyuan Guo</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+C">Chencheng Han</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+K">Ke Han</a>, <a href="/search/?searchtype=author&amp;query=He%2C+C">Changda He</a>, <a href="/search/?searchtype=author&amp;query=He%2C+J">Jinrong He</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+D">Di Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+Y">Yanlin Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+Z">Zhou Huang</a> , et al. (66 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.04239v3-abstract-short" style="display: inline;"> PandaX-4T experiment is a deep-underground dark matter direct search experiment that employs a dual-phase time projection chamber with a sensitive volume containing 3.7 tonne of liquid xenon. The detector of PandaX-4T is capable of simultaneously collecting the primary scintillation and ionization signals, utilizing their ratio to discriminate dark matter signals from background sources such as ga&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04239v3-abstract-full').style.display = 'inline'; document.getElementById('2403.04239v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.04239v3-abstract-full" style="display: none;"> PandaX-4T experiment is a deep-underground dark matter direct search experiment that employs a dual-phase time projection chamber with a sensitive volume containing 3.7 tonne of liquid xenon. The detector of PandaX-4T is capable of simultaneously collecting the primary scintillation and ionization signals, utilizing their ratio to discriminate dark matter signals from background sources such as gamma rays and beta particles. The signal response model plays a crucial role in interpreting the data obtained by PandaX-4T. It describes the conversion from the deposited energy by dark matter interactions to the detectable signals within the detector. The signal response model is utilized in various PandaX-4T results. This work provides a comprehensive description of the procedures involved in constructing and parameter-fitting the signal response model for the energy range of approximately 1 keV to 25 keV for electronic recoils and 6 keV to 90 keV for nuclear recoils. It also covers the signal reconstruction, selection, and correction methods, which are crucial components integrated into the signal response model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04239v3-abstract-full').style.display = 'none'; document.getElementById('2403.04239v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18158">arXiv:2402.18158</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.18158">pdf</a>, <a href="https://arxiv.org/format/2402.18158">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Quantized Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Li%2C+S">Shiyao Li</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+L">Luning Wang</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+T">Tengxuan Liu</a>, <a href="/search/?searchtype=author&amp;query=Shi%2C+X">Xiangsheng Shi</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Yang%2C+H">Huazhong Yang</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18158v2-abstract-short" style="display: inline;"> Post-training quantization (PTQ) has emerged as a promising technique to reduce the cost of large language models (LLMs). Specifically, PTQ can effectively mitigate memory consumption and reduce computational overhead in LLMs. To meet the requirements of both high efficiency and performance across diverse scenarios, a comprehensive evaluation of quantized LLMs is essential to guide the selection o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18158v2-abstract-full').style.display = 'inline'; document.getElementById('2402.18158v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18158v2-abstract-full" style="display: none;"> Post-training quantization (PTQ) has emerged as a promising technique to reduce the cost of large language models (LLMs). Specifically, PTQ can effectively mitigate memory consumption and reduce computational overhead in LLMs. To meet the requirements of both high efficiency and performance across diverse scenarios, a comprehensive evaluation of quantized LLMs is essential to guide the selection of quantization methods. This paper presents a thorough evaluation of these factors by evaluating the effect of PTQ on Weight, Activation, and KV Cache on 11 model families, including OPT, LLaMA2, Falcon, Bloomz, Mistral, ChatGLM, Vicuna, LongChat, StableLM, Gemma, and Mamba, with parameters ranging from 125M to 180B. The evaluation encompasses five types of tasks: basic NLP, emergent ability, trustworthiness, dialogue, and long-context tasks. Moreover, we also evaluate the state-of-the-art (SOTA) quantization methods to demonstrate their applicability. Based on the extensive experiments, we systematically summarize the effect of quantization, provide recommendations to apply quantization techniques, and point out future directions. The code can be found in https://github.com/thu-nics/qllm-eval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18158v2-abstract-full').style.display = 'none'; document.getElementById('2402.18158v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09391">arXiv:2402.09391</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.09391">pdf</a>, <a href="https://arxiv.org/format/2402.09391">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LlaSMol: Advancing Large Language Models for Chemistry with a Large-Scale, Comprehensive, High-Quality Instruction Tuning Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Yu%2C+B">Botao Yu</a>, <a href="/search/?searchtype=author&amp;query=Baker%2C+F+N">Frazier N. Baker</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Z">Ziqi Chen</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xia Ning</a>, <a href="/search/?searchtype=author&amp;query=Sun%2C+H">Huan Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09391v4-abstract-short" style="display: inline;"> Chemistry plays a crucial role in many domains, such as drug discovery and material science. While large language models (LLMs) such as GPT-4 exhibit remarkable capabilities on natural language processing tasks, existing research indicates that their performance on chemistry tasks is discouragingly low. In this paper, however, we demonstrate that our developed LLMs can achieve very strong results&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09391v4-abstract-full').style.display = 'inline'; document.getElementById('2402.09391v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09391v4-abstract-full" style="display: none;"> Chemistry plays a crucial role in many domains, such as drug discovery and material science. While large language models (LLMs) such as GPT-4 exhibit remarkable capabilities on natural language processing tasks, existing research indicates that their performance on chemistry tasks is discouragingly low. In this paper, however, we demonstrate that our developed LLMs can achieve very strong results on a comprehensive set of chemistry tasks, outperforming the most advanced GPT-4 and Claude 3 Opus by a substantial margin. To accomplish this, we propose SMolInstruct, a large-scale, comprehensive, and high-quality dataset for instruction tuning. It contains 14 selected chemistry tasks and over three million samples, laying a solid foundation for training and evaluating LLMs for chemistry. Using SMolInstruct, we fine-tune a set of open-source LLMs, among which, we find that Mistral serves as the best base model for chemistry tasks. Our analysis further demonstrates the critical role of the proposed dataset in driving the performance improvements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09391v4-abstract-full').style.display = 'none'; document.getElementById('2402.09391v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by COLM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.08831">arXiv:2402.08831</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.08831">pdf</a>, <a href="https://arxiv.org/format/2402.08831">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> eCeLLM: Generalizing Large Language Models for E-commerce from Large-scale, High-quality Instruction Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Peng%2C+B">Bo Peng</a>, <a href="/search/?searchtype=author&amp;query=Ling%2C+X">Xinyi Ling</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+Z">Ziru Chen</a>, <a href="/search/?searchtype=author&amp;query=Sun%2C+H">Huan Sun</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xia Ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.08831v2-abstract-short" style="display: inline;"> With tremendous efforts on developing effective e-commerce models, conventional e-commerce models show limited success in generalist e-commerce modeling, and suffer from unsatisfactory performance on new users and new products - a typical out-of-domain generalization challenge. Meanwhile, large language models (LLMs) demonstrate outstanding performance in generalist modeling and out-of-domain gene&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08831v2-abstract-full').style.display = 'inline'; document.getElementById('2402.08831v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.08831v2-abstract-full" style="display: none;"> With tremendous efforts on developing effective e-commerce models, conventional e-commerce models show limited success in generalist e-commerce modeling, and suffer from unsatisfactory performance on new users and new products - a typical out-of-domain generalization challenge. Meanwhile, large language models (LLMs) demonstrate outstanding performance in generalist modeling and out-of-domain generalizability in many fields. Toward fully unleashing their power for e-commerce, in this paper, we construct ECInstruct, the first open-sourced, large-scale, and high-quality benchmark instruction dataset for e-commerce. Leveraging ECInstruct, we develop eCeLLM, a series of e-commerce LLMs, by instruction-tuning general-purpose LLMs. Our comprehensive experiments and evaluation demonstrate that eCeLLM models substantially outperform baseline models, including the most advanced GPT-4, and the state-of-the-art task-specific models in in-domain evaluation. Moreover, eCeLLM exhibits excellent generalizability to out-of-domain settings, including unseen products and unseen instructions, highlighting its superiority as a generalist e-commerce model. Both the ECInstruct dataset and the eCeLLM models show great potential in empowering versatile and effective LLMs for e-commerce. ECInstruct and eCeLLM models are publicly accessible through https://ninglab.github.io/eCeLLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08831v2-abstract-full').style.display = 'none'; document.getElementById('2402.08831v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2024; Bo Peng and Xinyi Ling contributed equally to this paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05136">arXiv:2402.05136</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.05136">pdf</a>, <a href="https://arxiv.org/format/2402.05136">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LV-Eval: A Balanced Long-Context Benchmark with 5 Length Levels Up to 256K </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Yuan%2C+T">Tao Yuan</a>, <a href="/search/?searchtype=author&amp;query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&amp;query=Zhou%2C+D">Dong Zhou</a>, <a href="/search/?searchtype=author&amp;query=Yang%2C+Z">Zhijie Yang</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+S">Shiyao Li</a>, <a href="/search/?searchtype=author&amp;query=Zhuang%2C+M">Minghui Zhuang</a>, <a href="/search/?searchtype=author&amp;query=Tan%2C+Z">Zheyue Tan</a>, <a href="/search/?searchtype=author&amp;query=Yao%2C+Z">Zhuyu Yao</a>, <a href="/search/?searchtype=author&amp;query=Lin%2C+D">Dahua Lin</a>, <a href="/search/?searchtype=author&amp;query=Li%2C+B">Boxun Li</a>, <a href="/search/?searchtype=author&amp;query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&amp;query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05136v2-abstract-short" style="display: inline;"> State-of-the-art large language models (LLMs) are now claiming remarkable supported context lengths of 256k or even more. In contrast, the average context lengths of mainstream benchmarks are insufficient (5k-21k), and they suffer from potential knowledge leakage and inaccurate metrics, resulting in biased evaluation. This paper introduces LV-Eval, a challenging long-context benchmark with five le&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05136v2-abstract-full').style.display = 'inline'; document.getElementById('2402.05136v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05136v2-abstract-full" style="display: none;"> State-of-the-art large language models (LLMs) are now claiming remarkable supported context lengths of 256k or even more. In contrast, the average context lengths of mainstream benchmarks are insufficient (5k-21k), and they suffer from potential knowledge leakage and inaccurate metrics, resulting in biased evaluation. This paper introduces LV-Eval, a challenging long-context benchmark with five length levels (16k, 32k, 64k, 128k, and 256k) reaching up to 256k words. LV-Eval features two main tasks, single-hop QA and multi-hop QA, comprising 11 bilingual datasets. The design of LV-Eval has incorporated three key techniques, namely confusing facts insertion, keyword and phrase replacement, and keyword-recall-based metric design. The advantages of LV-Eval include controllable evaluation across different context lengths, challenging test instances with confusing facts, mitigated knowledge leakage, and more objective evaluations. We evaluate 15 LLMs on LV-Eval and conduct ablation studies on the benchmarking techniques. The results reveal that: (i) Moonshot-v1 and recent large-scale open-source models, such as Qwen-2.5-72B and Llama-3.1-70B, achieve the highest performance on LV-Eval, particularly at lengths below 64k. (ii) Models exhibit distinct score trends. For example, GLM-4-9B-128k, Yi-6B-200k, and Llama3-8B-1M exhibit a relatively gentle degradation of performance, but their absolute performances may not necessarily be higher than those of LLMs with shorter context lengths. (iii) LLMs&#39; performances can significantly degrade in the presence of confusing information, especially in the pressure test of &#34;needle in a haystack&#34;. (iv) Issues related to knowledge leakage and inaccurate metrics introduce bias in evaluation, and these concerns are alleviated in LV-Eval. All datasets and evaluation codes are released at: https://github.com/infinigence/LVEval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05136v2-abstract-full').style.display = 'none'; document.getElementById('2402.05136v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.03596">arXiv:2402.03596</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.03596">pdf</a>, <a href="https://arxiv.org/format/2402.03596">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Phenomenology">hep-ph</span> </div> </div> <p class="title is-5 mathjax"> PandaX-xT: a Multi-ten-tonne Liquid Xenon Observatory at the China Jinping Underground Laboratory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=PandaX+Collaboration"> PandaX Collaboration</a>, <a href="/search/?searchtype=author&amp;query=Abdukerim%2C+A">Abdusalam Abdukerim</a>, <a href="/search/?searchtype=author&amp;query=Bo%2C+Z">Zihao Bo</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/?searchtype=author&amp;query=Chen%2C+X">Xun Chen</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+C">Chen Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cheng%2C+Z">Zhaokan Cheng</a>, <a href="/search/?searchtype=author&amp;query=Cui%2C+X">Xiangyi Cui</a>, <a href="/search/?searchtype=author&amp;query=Fan%2C+Y">Yingjie Fan</a>, <a href="/search/?searchtype=author&amp;query=Fang%2C+D">Deqing Fang</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+L">Lisheng Geng</a>, <a href="/search/?searchtype=author&amp;query=Giboni%2C+K">Karl Giboni</a>, <a href="/search/?searchtype=author&amp;query=Gu%2C+L">Linhui Gu</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xunan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+X">Xuyuan Guo</a>, <a href="/search/?searchtype=author&amp;query=Guo%2C+Z">Zhichao Guo</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+C">Chencheng Han</a>, <a href="/search/?searchtype=author&amp;query=Han%2C+K">Ke Han</a>, <a href="/search/?searchtype=author&amp;query=He%2C+C">Changda He</a>, <a href="/search/?searchtype=author&amp;query=He%2C+J">Jinrong He</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+D">Di Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+J">Junting Huang</a>, <a href="/search/?searchtype=author&amp;query=Huang%2C+Z">Zhou Huang</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+R">Ruquan Hou</a>, <a href="/search/?searchtype=author&amp;query=Hou%2C+Y">Yu Hou</a> , et al. (68 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.03596v1-abstract-short" style="display: inline;"> We propose a major upgrade to the existing PandaX-4T experiment in the China Jinping Underground Laboratory. The new experiment, PandaX-xT, will be a multi-ten-tonne liquid xenon, ultra-low background, and general-purpose observatory. The full-scaled PandaX-xT contains a 43-tonne liquid xenon active target. Such an experiment will significantly advance our fundamental understanding of particle phy&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03596v1-abstract-full').style.display = 'inline'; document.getElementById('2402.03596v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.03596v1-abstract-full" style="display: none;"> We propose a major upgrade to the existing PandaX-4T experiment in the China Jinping Underground Laboratory. The new experiment, PandaX-xT, will be a multi-ten-tonne liquid xenon, ultra-low background, and general-purpose observatory. The full-scaled PandaX-xT contains a 43-tonne liquid xenon active target. Such an experiment will significantly advance our fundamental understanding of particle physics and astrophysics. The sensitivity of dark matter direct detection will be improved by nearly two orders of magnitude compared to the current best limits, approaching the so-called &#34;neutrino floor&#34; for a dark matter mass above 10 GeV/$c^2$, providing a decisive test to the Weakly Interacting Massive Particle paradigm. By searching for the neutrinoless double beta decay of $^{136}$Xe isotope in the detector, the effective Majorana neutrino mass can be measured to a [10 -- 41] meV/$c^2$ sensitivity, providing a key test to the Dirac/Majorana nature of neutrino s. Astrophysical neutrinos and other ultra-rare interactions can also be measured and searched for with an unprecedented background level, opening up new windows of discovery. Depending on the findings, PandaX-xT will seek the next stage upgrade utilizing isotopic separation on natural xenon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03596v1-abstract-full').style.display = 'none'; document.getElementById('2402.03596v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ning%2C+X&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10