CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 3,394 results for author: <span class="mathjax">Zhou, H</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zhou, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zhou%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zhou, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14795">arXiv:2502.14795</a> <span> [<a href="https://arxiv.org/pdf/2502.14795">pdf</a>, <a href="https://arxiv.org/format/2502.14795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Humanoid-VLA: Towards Universal Humanoid Control with Visual Integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ding%2C+P">Pengxiang Ding</a>, <a href="/search/?searchtype=author&query=Ma%2C+J">Jianfei Ma</a>, <a href="/search/?searchtype=author&query=Tong%2C+X">Xinyang Tong</a>, <a href="/search/?searchtype=author&query=Zou%2C+B">Binghong Zou</a>, <a href="/search/?searchtype=author&query=Luo%2C+X">Xinxin Luo</a>, <a href="/search/?searchtype=author&query=Fan%2C+Y">Yiguo Fan</a>, <a href="/search/?searchtype=author&query=Wang%2C+T">Ting Wang</a>, <a href="/search/?searchtype=author&query=Lu%2C+H">Hongchao Lu</a>, <a href="/search/?searchtype=author&query=Mo%2C+P">Panzhong Mo</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jinxin Liu</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yuefan Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Huaicheng Zhou</a>, <a href="/search/?searchtype=author&query=Feng%2C+W">Wenshuo Feng</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jiacheng Liu</a>, <a href="/search/?searchtype=author&query=Huang%2C+S">Siteng Huang</a>, <a href="/search/?searchtype=author&query=Wang%2C+D">Donglin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14795v1-abstract-short" style="display: inline;"> This paper addresses the limitations of current humanoid robot control frameworks, which primarily rely on reactive mechanisms and lack autonomous interaction capabilities due to data scarcity. We propose Humanoid-VLA, a novel framework that integrates language understanding, egocentric scene perception, and motion control, enabling universal humanoid control. Humanoid-VLA begins with language-mot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14795v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14795v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14795v1-abstract-full" style="display: none;"> This paper addresses the limitations of current humanoid robot control frameworks, which primarily rely on reactive mechanisms and lack autonomous interaction capabilities due to data scarcity. We propose Humanoid-VLA, a novel framework that integrates language understanding, egocentric scene perception, and motion control, enabling universal humanoid control. Humanoid-VLA begins with language-motion pre-alignment using non-egocentric human motion datasets paired with textual descriptions, allowing the model to learn universal motion patterns and action semantics. We then incorporate egocentric visual context through a parameter efficient video-conditioned fine-tuning, enabling context-aware motion generation. Furthermore, we introduce a self-supervised data augmentation strategy that automatically generates pseudoannotations directly derived from motion data. This process converts raw motion sequences into informative question-answer pairs, facilitating the effective use of large-scale unlabeled video data. Built upon whole-body control architectures, extensive experiments show that Humanoid-VLA achieves object interaction and environment exploration tasks with enhanced contextual awareness, demonstrating a more human-like capacity for adaptive and intelligent engagement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14795v1-abstract-full').style.display = 'none'; document.getElementById('2502.14795v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14664">arXiv:2502.14664</a> <span> [<a href="https://arxiv.org/pdf/2502.14664">pdf</a>, <a href="https://arxiv.org/format/2502.14664">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Phenomenology">hep-ph</span> </div> </div> <p class="title is-5 mathjax"> Status of $\mathbb{Z}_3$-NMSSM featuring a light bino-dominated LSP and a light singlet-like scalar under the LZ Experiment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+H">Haijing Zhou</a>, <a href="/search/?searchtype=author&query=Ban%2C+G">Guangning Ban</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14664v1-abstract-short" style="display: inline;"> In the presence of a light singlet-like scalar state, the Bino-dominated dark matter (DM) candidate in the $\mathbb{Z}_3$-NMSSM exhibits significant differences from its counterpart in the MSSM, both in terms of intrinsic properties and mechanisms governing DM relic density and detection. Motivated by recent advancements in particle physics experiments, we systematically analyzed the implications… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14664v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14664v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14664v1-abstract-full" style="display: none;"> In the presence of a light singlet-like scalar state, the Bino-dominated dark matter (DM) candidate in the $\mathbb{Z}_3$-NMSSM exhibits significant differences from its counterpart in the MSSM, both in terms of intrinsic properties and mechanisms governing DM relic density and detection. Motivated by recent advancements in particle physics experiments, we systematically analyzed the implications of these developments for the $\mathbb{Z}_3$-NMSSM framework featuring a light bino-dominated DM candidate and a light singlet-like scalar, ensuring theoretical consistency with empirical observations. Of paramount relevance are the latest direct detection constraints from the LZ experiment, SUSY searches at the LHC, and precision measurements of the Muon g-2 anomaly at Fermilab, which collectively impose complementary constraints on the model's parameter space. Our investigation utilized the MultiNest algorithm to perform a rigorous parameter space scan, informed by the LZ(2022) experimental limits, while incorporating constraints from LHC Higgs analyses, Muon g-2 data, and B-physics observables. The results demonstrate that current experimental bounds, particularly stringent limits on SI and SD DM-nucleus scattering cross-sections, coupled with LHC exclusion limits on electroweakinos, strongly disfavor this scenario. Nonetheless, the model retains the capacity to naturally reproduce the Z boson mass and Standard Model-like Higgs boson mass, explain the Muon Anomalous Magnetic Moment, and generate substantial corrections to the W boson mass. These findings exclusively manifest within the NMSSM framework, arising from the interplay between bino-dominated DM and singlino components, mediated through indispensable higgsino participation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14664v1-abstract-full').style.display = 'none'; document.getElementById('2502.14664v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages, 11 figures. arXiv admin note: text overlap with arXiv:2012.04026, arXiv:1712.09873, arXiv:2203.08206, arXiv:2312.01594, arXiv:2303.02360, arXiv:0910.1785 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14605">arXiv:2502.14605</a> <span> [<a href="https://arxiv.org/pdf/2502.14605">pdf</a>, <a href="https://arxiv.org/ps/2502.14605">ps</a>, <a href="https://arxiv.org/format/2502.14605">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Strongly Correlated Electrons">cond-mat.str-el</span> </div> </div> <p class="title is-5 mathjax"> Emergent Goldstone flat bands and spontaneous symmetry breaking with type-B Goldstone modes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+H">Huan-Qiang Zhou</a>, <a href="/search/?searchtype=author&query=Osborne%2C+J+J">Jesse J. Osborne</a>, <a href="/search/?searchtype=author&query=Shi%2C+Q">Qian-Qian Shi</a>, <a href="/search/?searchtype=author&query=McCulloch%2C+I+P">Ian P. McCulloch</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14605v1-abstract-short" style="display: inline;"> For a quantum many-body spin system undergoing spontaneous symmetry breaking with type-B Goldstone modes, a high degree of degeneracy arises in the ground state manifold. Generically, if this degeneracy is polynomial in system size, then it does not depend on the type of boundary conditions used. However, if there exists an emergent (local) symmetry operation tailored to a specific degenerate grou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14605v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14605v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14605v1-abstract-full" style="display: none;"> For a quantum many-body spin system undergoing spontaneous symmetry breaking with type-B Goldstone modes, a high degree of degeneracy arises in the ground state manifold. Generically, if this degeneracy is polynomial in system size, then it does not depend on the type of boundary conditions used. However, if there exists an emergent (local) symmetry operation tailored to a specific degenerate ground state, then we show that the degeneracies are exponential in system size and are different under periodic boundary conditions (PBCs) and open boundary conditions (OBCs). We further show that the exponential ground state degeneracies in turn imply the emergence of Goldstone flat bands -- single-mode excitations generated by a multi-site operator and its images under the repeated action of the translation operation under PBCs or the cyclic permutation symmetry operation under OBCs. Conversely, we also show that the presence of emergent Goldstone flat bands implies that there exists an emergent (local) symmetry operation tailored to a specific degenerate ground state. In addition, we propose an extrinsic characterization of emergent Goldstone flat bands, revealing a connection to quantum many-body scars, which violate the eigenstate thermalization hypothesis. We illustrate this by presenting examples from the staggered ${\rm SU}(4)$ spin-1 ferromagnetic biquadratic model and the staggered ${\rm SU}(4)$ ferromagnetic spin-orbital model. We also perform extensive numerical simulations for the more general ${\rm SO}(3)$ spin-1 bilinear-biquadratic and ${\rm SO(4)}$ ferromagnetic spin-orbital models, containing the two aforementioned models as the endpoints in the ferromagnetic regimes respectively, and confirm the emergence of Goldstone flat bands, as we approach these endpoints from deep inside the ferromagnetic regimes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14605v1-abstract-full').style.display = 'none'; document.getElementById('2502.14605v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14227">arXiv:2502.14227</a> <span> [<a href="https://arxiv.org/pdf/2502.14227">pdf</a>, <a href="https://arxiv.org/format/2502.14227">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SleepGMUformer: A gated multimodal temporal neural network for sleep staging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhao%2C+C">Chenjun Zhao</a>, <a href="/search/?searchtype=author&query=Niu%2C+X">Xuesen Niu</a>, <a href="/search/?searchtype=author&query=Yu%2C+X">Xinglin Yu</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Long Chen</a>, <a href="/search/?searchtype=author&query=Lv%2C+N">Na Lv</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Huiyu Zhou</a>, <a href="/search/?searchtype=author&query=Zhao%2C+A">Aite Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14227v1-abstract-short" style="display: inline;"> Sleep staging is a key method for assessing sleep quality and diagnosing sleep disorders. However, current deep learning methods face challenges: 1) postfusion techniques ignore the varying contributions of different modalities; 2) unprocessed sleep data can interfere with frequency-domain information. To tackle these issues, this paper proposes a gated multimodal temporal neural network for multi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14227v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14227v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14227v1-abstract-full" style="display: none;"> Sleep staging is a key method for assessing sleep quality and diagnosing sleep disorders. However, current deep learning methods face challenges: 1) postfusion techniques ignore the varying contributions of different modalities; 2) unprocessed sleep data can interfere with frequency-domain information. To tackle these issues, this paper proposes a gated multimodal temporal neural network for multidomain sleep data, including heart rate, motion, steps, EEG (Fpz-Cz, Pz-Oz), and EOG from WristHR-Motion-Sleep and SleepEDF-78. The model integrates: 1) a pre-processing module for feature alignment, missing value handling, and EEG de-trending; 2) a feature extraction module for complex sleep features in the time dimension; and 3) a dynamic fusion module for real-time modality weighting.Experiments show classification accuracies of 85.03% on SleepEDF-78 and 94.54% on WristHR-Motion-Sleep datasets. The model handles heterogeneous datasets and outperforms state-of-the-art models by 1.00%-4.00%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14227v1-abstract-full').style.display = 'none'; document.getElementById('2502.14227v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13921">arXiv:2502.13921</a> <span> [<a href="https://arxiv.org/pdf/2502.13921">pdf</a>, <a href="https://arxiv.org/format/2502.13921">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Exploring Code Language Models for Automated HLS-based Hardware Generation: Benchmark, Infrastructure and Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gai%2C+J">Jiahao Gai</a>, <a href="/search/?searchtype=author&query=Hao"> Hao</a>, <a href="/search/?searchtype=author&query=Chen"> Chen</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhican Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyu Zhou</a>, <a href="/search/?searchtype=author&query=Zhao%2C+W">Wanru Zhao</a>, <a href="/search/?searchtype=author&query=Lane%2C+N">Nicholas Lane</a>, <a href="/search/?searchtype=author&query=Fan%2C+H">Hongxiang Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13921v1-abstract-short" style="display: inline;"> Recent advances in code generation have illuminated the potential of employing large language models (LLMs) for general-purpose programming languages such as Python and C++, opening new opportunities for automating software development and enhancing programmer productivity. The potential of LLMs in software programming has sparked significant interest in exploring automated hardware generation and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13921v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13921v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13921v1-abstract-full" style="display: none;"> Recent advances in code generation have illuminated the potential of employing large language models (LLMs) for general-purpose programming languages such as Python and C++, opening new opportunities for automating software development and enhancing programmer productivity. The potential of LLMs in software programming has sparked significant interest in exploring automated hardware generation and automation. Although preliminary endeavors have been made to adopt LLMs in generating hardware description languages (HDLs), several challenges persist in this direction. First, the volume of available HDL training data is substantially smaller compared to that for software programming languages. Second, the pre-trained LLMs, mainly tailored for software code, tend to produce HDL designs that are more error-prone. Third, the generation of HDL requires a significantly higher number of tokens compared to software programming, leading to inefficiencies in cost and energy consumption. To tackle these challenges, this paper explores leveraging LLMs to generate High-Level Synthesis (HLS)-based hardware design. Although code generation for domain-specific programming languages is not new in the literature, we aim to provide experimental results, insights, benchmarks, and evaluation infrastructure to investigate the suitability of HLS over low-level HDLs for LLM-assisted hardware design generation. To achieve this, we first finetune pre-trained models for HLS-based hardware generation, using a collected dataset with text prompts and corresponding reference HLS designs. An LLM-assisted framework is then proposed to automate end-to-end hardware code generation, which also investigates the impact of chain-of-thought and feedback loops promoting techniques on HLS-design generation. Limited by the timeframe of this research, we plan to evaluate more advanced reasoning models in the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13921v1-abstract-full').style.display = 'none'; document.getElementById('2502.13921v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper accepted by ASP-DAC'25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13540">arXiv:2502.13540</a> <span> [<a href="https://arxiv.org/pdf/2502.13540">pdf</a>, <a href="https://arxiv.org/format/2502.13540">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Amplitude analysis of $蠄(3686)\to 纬K_S^0 K_S^0 $ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=BESIII+Collaboration"> BESIII Collaboration</a>, <a href="/search/?searchtype=author&query=Ablikim%2C+M">M. Ablikim</a>, <a href="/search/?searchtype=author&query=Achasov%2C+M+N">M. N. Achasov</a>, <a href="/search/?searchtype=author&query=Adlarson%2C+P">P. Adlarson</a>, <a href="/search/?searchtype=author&query=Ai%2C+X+C">X. C. Ai</a>, <a href="/search/?searchtype=author&query=Aliberti%2C+R">R. Aliberti</a>, <a href="/search/?searchtype=author&query=Amoroso%2C+A">A. Amoroso</a>, <a href="/search/?searchtype=author&query=An%2C+Q">Q. An</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Y. Bai</a>, <a href="/search/?searchtype=author&query=Bakina%2C+O">O. Bakina</a>, <a href="/search/?searchtype=author&query=Ban%2C+Y">Y. Ban</a>, <a href="/search/?searchtype=author&query=Bao%2C+H+-">H. -R. Bao</a>, <a href="/search/?searchtype=author&query=Batozskaya%2C+V">V. Batozskaya</a>, <a href="/search/?searchtype=author&query=Begzsuren%2C+K">K. Begzsuren</a>, <a href="/search/?searchtype=author&query=Berger%2C+N">N. Berger</a>, <a href="/search/?searchtype=author&query=Berlowski%2C+M">M. Berlowski</a>, <a href="/search/?searchtype=author&query=Bertani%2C+M">M. Bertani</a>, <a href="/search/?searchtype=author&query=Bettoni%2C+D">D. Bettoni</a>, <a href="/search/?searchtype=author&query=Bianchi%2C+F">F. Bianchi</a>, <a href="/search/?searchtype=author&query=Bianco%2C+E">E. Bianco</a>, <a href="/search/?searchtype=author&query=Bortone%2C+A">A. Bortone</a>, <a href="/search/?searchtype=author&query=Boyko%2C+I">I. Boyko</a>, <a href="/search/?searchtype=author&query=Briere%2C+R+A">R. A. Briere</a>, <a href="/search/?searchtype=author&query=Brueggemann%2C+A">A. Brueggemann</a>, <a href="/search/?searchtype=author&query=Cai%2C+H">H. Cai</a> , et al. (704 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13540v1-abstract-short" style="display: inline;"> Using $(2712\pm14)\times10^6$ $蠄(3686)$ events collected with the BESIII detector, we perform the first amplitude analysis of the radiative decay $蠄(3686)\to 纬K_S^0 K_S^0$ within the mass region $M_{K_S^0 K_S^0 }<2.8$ GeV/$c^2$. Employing a one-channel K-matrix approach for the description of the dynamics of the $K^0_S K^0_S$ system, the data sample is well described with four poles for the $f_0$-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13540v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13540v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13540v1-abstract-full" style="display: none;"> Using $(2712\pm14)\times10^6$ $蠄(3686)$ events collected with the BESIII detector, we perform the first amplitude analysis of the radiative decay $蠄(3686)\to 纬K_S^0 K_S^0$ within the mass region $M_{K_S^0 K_S^0 }<2.8$ GeV/$c^2$. Employing a one-channel K-matrix approach for the description of the dynamics of the $K^0_S K^0_S$ system, the data sample is well described with four poles for the $f_0$-wave and three poles for the $f_2$-wave. The determined pole positions are consistent with those of well-established resonance states. The observed $f_0$ and $f_{2}$ states are found to be qualitatively consistent with those produced in radiative $J/蠄$ decays, indicating the similarity between the two charmonium states in their radiative decays. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13540v1-abstract-full').style.display = 'none'; document.getElementById('2502.13540v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 4 figures, submitted to JHEP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13462">arXiv:2502.13462</a> <span> [<a href="https://arxiv.org/pdf/2502.13462">pdf</a>, <a href="https://arxiv.org/format/2502.13462">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Integrating Sequential Hypothesis Testing into Adversarial Games: A Sun Zi-Inspired Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+H">Haosheng Zhou</a>, <a href="/search/?searchtype=author&query=Ralston%2C+D">Daniel Ralston</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xu Yang</a>, <a href="/search/?searchtype=author&query=Hu%2C+R">Ruimeng Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13462v1-abstract-short" style="display: inline;"> This paper investigates the interplay between sequential hypothesis testing (SHT) and adversarial decision-making in partially observable games, focusing on the deceptive strategies of red and blue teams. Inspired by Sun Zi's The Art of War and its emphasis on deception, we develop a novel framework to both deceive adversaries and counter their deceptive tactics. We model this interaction as a Sta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13462v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13462v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13462v1-abstract-full" style="display: none;"> This paper investigates the interplay between sequential hypothesis testing (SHT) and adversarial decision-making in partially observable games, focusing on the deceptive strategies of red and blue teams. Inspired by Sun Zi's The Art of War and its emphasis on deception, we develop a novel framework to both deceive adversaries and counter their deceptive tactics. We model this interaction as a Stackelberg game where the blue team, as the follower, optimizes its controls to achieve its goals while misleading the red team into forming incorrect beliefs on its intentions. The red team, as the leader, strategically constructs and instills false beliefs through the blue team's envisioned SHT to manipulate the blue team's behavior and reveal its true objectives. The blue team's optimization problem balances the fulfillment of its primary objectives and the level of misdirection, while the red team coaxes the blue team into behaving consistently with its actual intentions. We derive a semi-explicit solution for the blue team's control problem within a linear-quadratic framework, and illustrate how the red team leverages leaked information from the blue team to counteract deception. Numerical experiments validate the model, showcasing the effectiveness of deception-driven strategies in adversarial systems. These findings integrate ancient strategic insights with modern control and game theory, providing a foundation for further exploration in adversarial decision-making, such as cybersecurity, autonomous systems, and financial markets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13462v1-abstract-full').style.display = 'none'; document.getElementById('2502.13462v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12671">arXiv:2502.12671</a> <span> [<a href="https://arxiv.org/pdf/2502.12671">pdf</a>, <a href="https://arxiv.org/format/2502.12671">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Baichuan-M1: Pushing the Medical Capability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+B">Bingning Wang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Haizhou Zhao</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Huozhi Zhou</a>, <a href="/search/?searchtype=author&query=Song%2C+L">Liang Song</a>, <a href="/search/?searchtype=author&query=Xu%2C+M">Mingyu Xu</a>, <a href="/search/?searchtype=author&query=Cheng%2C+W">Wei Cheng</a>, <a href="/search/?searchtype=author&query=Zeng%2C+X">Xiangrong Zeng</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yupeng Zhang</a>, <a href="/search/?searchtype=author&query=Huo%2C+Y">Yuqi Huo</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zecheng Wang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Z">Zhengyun Zhao</a>, <a href="/search/?searchtype=author&query=Pan%2C+D">Da Pan</a>, <a href="/search/?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/?searchtype=author&query=Kou%2C+F">Fei Kou</a>, <a href="/search/?searchtype=author&query=Li%2C+F">Fei Li</a>, <a href="/search/?searchtype=author&query=Chen%2C+F">Fuzhong Chen</a>, <a href="/search/?searchtype=author&query=Dong%2C+G">Guosheng Dong</a>, <a href="/search/?searchtype=author&query=Liu%2C+H">Han Liu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongda Zhang</a>, <a href="/search/?searchtype=author&query=He%2C+J">Jin He</a>, <a href="/search/?searchtype=author&query=Yang%2C+J">Jinjie Yang</a>, <a href="/search/?searchtype=author&query=Wu%2C+K">Kangxi Wu</a>, <a href="/search/?searchtype=author&query=Wu%2C+K">Kegeng Wu</a>, <a href="/search/?searchtype=author&query=Su%2C+L">Lei Su</a>, <a href="/search/?searchtype=author&query=Niu%2C+L">Linlin Niu</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12671v1-abstract-short" style="display: inline;"> The current generation of large language models (LLMs) is typically designed for broad, general-purpose applications, while domain-specific LLMs, especially in vertical fields like medicine, remain relatively scarce. In particular, the development of highly efficient and practical LLMs for the medical domain is challenging due to the complexity of medical knowledge and the limited availability of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12671v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12671v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12671v1-abstract-full" style="display: none;"> The current generation of large language models (LLMs) is typically designed for broad, general-purpose applications, while domain-specific LLMs, especially in vertical fields like medicine, remain relatively scarce. In particular, the development of highly efficient and practical LLMs for the medical domain is challenging due to the complexity of medical knowledge and the limited availability of high-quality data. To bridge this gap, we introduce Baichuan-M1, a series of large language models specifically optimized for medical applications. Unlike traditional approaches that simply continue pretraining on existing models or apply post-training to a general base model, Baichuan-M1 is trained from scratch with a dedicated focus on enhancing medical capabilities. Our model is trained on 20 trillion tokens and incorporates a range of effective training methods that strike a balance between general capabilities and medical expertise. As a result, Baichuan-M1 not only performs strongly across general domains such as mathematics and coding but also excels in specialized medical fields. We have open-sourced Baichuan-M1-14B, a mini version of our model, which can be accessed through the following links. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12671v1-abstract-full').style.display = 'none'; document.getElementById('2502.12671v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, technical report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12330">arXiv:2502.12330</a> <span> [<a href="https://arxiv.org/pdf/2502.12330">pdf</a>, <a href="https://arxiv.org/format/2502.12330">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> X-IL: Exploring the Design Space of Imitation Learning Policies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jia%2C+X">Xiaogang Jia</a>, <a href="/search/?searchtype=author&query=Donat%2C+A">Atalay Donat</a>, <a href="/search/?searchtype=author&query=Huang%2C+X">Xi Huang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+X">Xuan Zhao</a>, <a href="/search/?searchtype=author&query=Blessing%2C+D">Denis Blessing</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyi Zhou</a>, <a href="/search/?searchtype=author&query=Wang%2C+H+A">Han A. Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hanyi Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Q">Qian Wang</a>, <a href="/search/?searchtype=author&query=Lioutikov%2C+R">Rudolf Lioutikov</a>, <a href="/search/?searchtype=author&query=Neumann%2C+G">Gerhard Neumann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12330v2-abstract-short" style="display: inline;"> Designing modern imitation learning (IL) policies requires making numerous decisions, including the selection of feature encoding, architecture, policy representation, and more. As the field rapidly advances, the range of available options continues to grow, creating a vast and largely unexplored design space for IL policies. In this work, we present X-IL, an accessible open-source framework desig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12330v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12330v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12330v2-abstract-full" style="display: none;"> Designing modern imitation learning (IL) policies requires making numerous decisions, including the selection of feature encoding, architecture, policy representation, and more. As the field rapidly advances, the range of available options continues to grow, creating a vast and largely unexplored design space for IL policies. In this work, we present X-IL, an accessible open-source framework designed to systematically explore this design space. The framework's modular design enables seamless swapping of policy components, such as backbones (e.g., Transformer, Mamba, xLSTM) and policy optimization techniques (e.g., Score-matching, Flow-matching). This flexibility facilitates comprehensive experimentation and has led to the discovery of novel policy configurations that outperform existing methods on recent robot learning benchmarks. Our experiments demonstrate not only significant performance gains but also provide valuable insights into the strengths and weaknesses of various design choices. This study serves as both a practical reference for practitioners and a foundation for guiding future research in imitation learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12330v2-abstract-full').style.display = 'none'; document.getElementById('2502.12330v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12320">arXiv:2502.12320</a> <span> [<a href="https://arxiv.org/pdf/2502.12320">pdf</a>, <a href="https://arxiv.org/format/2502.12320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Fusing Point Cloud and Visual Representations for Imitation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Donat%2C+A">Atalay Donat</a>, <a href="/search/?searchtype=author&query=Jia%2C+X">Xiaogang Jia</a>, <a href="/search/?searchtype=author&query=Huang%2C+X">Xi Huang</a>, <a href="/search/?searchtype=author&query=Taranovic%2C+A">Aleksandar Taranovic</a>, <a href="/search/?searchtype=author&query=Blessing%2C+D">Denis Blessing</a>, <a href="/search/?searchtype=author&query=Li%2C+G">Ge Li</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyi Zhou</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hanyi Zhang</a>, <a href="/search/?searchtype=author&query=Lioutikov%2C+R">Rudolf Lioutikov</a>, <a href="/search/?searchtype=author&query=Neumann%2C+G">Gerhard Neumann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12320v2-abstract-short" style="display: inline;"> Learning for manipulation requires using policies that have access to rich sensory information such as point clouds or RGB images. Point clouds efficiently capture geometric structures, making them essential for manipulation tasks in imitation learning. In contrast, RGB images provide rich texture and semantic information that can be crucial for certain tasks. Existing approaches for fusing both m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12320v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12320v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12320v2-abstract-full" style="display: none;"> Learning for manipulation requires using policies that have access to rich sensory information such as point clouds or RGB images. Point clouds efficiently capture geometric structures, making them essential for manipulation tasks in imitation learning. In contrast, RGB images provide rich texture and semantic information that can be crucial for certain tasks. Existing approaches for fusing both modalities assign 2D image features to point clouds. However, such approaches often lose global contextual information from the original images. In this work, we propose FPV-Net, a novel imitation learning method that effectively combines the strengths of both point cloud and RGB modalities. Our method conditions the point-cloud encoder on global and local image tokens using adaptive layer norm conditioning, leveraging the beneficial properties of both modalities. Through extensive experiments on the challenging RoboCasa benchmark, we demonstrate the limitations of relying on either modality alone and show that our method achieves state-of-the-art performance across all tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12320v2-abstract-full').style.display = 'none'; document.getElementById('2502.12320v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12085">arXiv:2502.12085</a> <span> [<a href="https://arxiv.org/pdf/2502.12085">pdf</a>, <a href="https://arxiv.org/format/2502.12085">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> APB: Accelerating Distributed Long-Context Inference by Passing Compressed Context Blocks across GPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Y">Yuxiang Huang</a>, <a href="/search/?searchtype=author&query=Li%2C+M">Mingye Li</a>, <a href="/search/?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/?searchtype=author&query=Xiao%2C+C">Chaojun Xiao</a>, <a href="/search/?searchtype=author&query=Zhao%2C+W">Weilin Zhao</a>, <a href="/search/?searchtype=author&query=Ao%2C+S">Sun Ao</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J">Jie Zhou</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12085v1-abstract-short" style="display: inline;"> While long-context inference is crucial for advancing large language model (LLM) applications, its prefill speed remains a significant bottleneck. Current approaches, including sequence parallelism strategies and compute reduction through approximate attention mechanisms, still fall short of delivering optimal inference efficiency. This hinders scaling the inputs to longer sequences and processing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12085v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12085v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12085v1-abstract-full" style="display: none;"> While long-context inference is crucial for advancing large language model (LLM) applications, its prefill speed remains a significant bottleneck. Current approaches, including sequence parallelism strategies and compute reduction through approximate attention mechanisms, still fall short of delivering optimal inference efficiency. This hinders scaling the inputs to longer sequences and processing long-context queries in a timely manner. To address this, we introduce APB, an efficient long-context inference framework that leverages multi-host approximate attention to enhance prefill speed by reducing compute and enhancing parallelism simultaneously. APB introduces a communication mechanism for essential key-value pairs within a sequence parallelism framework, enabling a faster inference speed while maintaining task performance. We implement APB by incorporating a tailored FlashAttn kernel alongside optimized distribution strategies, supporting diverse models and parallelism configurations. APB achieves speedups of up to 9.2x, 4.2x, and 1.6x compared with FlashAttn, RingAttn, and StarAttn, respectively, without any observable task performance degradation. We provide the implementation and experiment code of APB in https://github.com/thunlp/APB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12085v1-abstract-full').style.display = 'none'; document.getElementById('2502.12085v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11946">arXiv:2502.11946</a> <span> [<a href="https://arxiv.org/pdf/2502.11946">pdf</a>, <a href="https://arxiv.org/format/2502.11946">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Step-Audio: Unified Understanding and Generation in Intelligent Speech Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+A">Ailin Huang</a>, <a href="/search/?searchtype=author&query=Wu%2C+B">Boyong Wu</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Bruce Wang</a>, <a href="/search/?searchtype=author&query=Yan%2C+C">Chao Yan</a>, <a href="/search/?searchtype=author&query=Hu%2C+C">Chen Hu</a>, <a href="/search/?searchtype=author&query=Feng%2C+C">Chengli Feng</a>, <a href="/search/?searchtype=author&query=Tian%2C+F">Fei Tian</a>, <a href="/search/?searchtype=author&query=Shen%2C+F">Feiyu Shen</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jingbei Li</a>, <a href="/search/?searchtype=author&query=Chen%2C+M">Mingrui Chen</a>, <a href="/search/?searchtype=author&query=Liu%2C+P">Peng Liu</a>, <a href="/search/?searchtype=author&query=Miao%2C+R">Ruihang Miao</a>, <a href="/search/?searchtype=author&query=You%2C+W">Wang You</a>, <a href="/search/?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xuerui Yang</a>, <a href="/search/?searchtype=author&query=Huang%2C+Y">Yechang Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/?searchtype=author&query=Gong%2C+Z">Zheng Gong</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zixin Zhang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyu Zhou</a>, <a href="/search/?searchtype=author&query=Sun%2C+J">Jianjian Sun</a>, <a href="/search/?searchtype=author&query=Li%2C+B">Brian Li</a>, <a href="/search/?searchtype=author&query=Feng%2C+C">Chengting Feng</a>, <a href="/search/?searchtype=author&query=Wan%2C+C">Changyi Wan</a>, <a href="/search/?searchtype=author&query=Hu%2C+H">Hanpeng Hu</a> , et al. (120 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11946v2-abstract-short" style="display: inline;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11946v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11946v2-abstract-full" style="display: none;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contributions include: 1) a 130B-parameter unified speech-text multi-modal model that achieves unified understanding and generation, with the Step-Audio-Chat version open-sourced; 2) a generative speech data engine that establishes an affordable voice cloning framework and produces the open-sourced lightweight Step-Audio-TTS-3B model through distillation; 3) an instruction-driven fine control system enabling dynamic adjustments across dialects, emotions, singing, and RAP; 4) an enhanced cognitive architecture augmented with tool calling and role-playing abilities to manage complex tasks effectively. Based on our new StepEval-Audio-360 evaluation benchmark, Step-Audio achieves state-of-the-art performance in human evaluations, especially in terms of instruction following. On open-source benchmarks like LLaMA Question, shows 9.3% average performance improvement, demonstrating our commitment to advancing the development of open-source multi-modal language technologies. Our code and models are available at https://github.com/stepfun-ai/Step-Audio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'none'; document.getElementById('2502.11946v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11937">arXiv:2502.11937</a> <span> [<a href="https://arxiv.org/pdf/2502.11937">pdf</a>, <a href="https://arxiv.org/format/2502.11937">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FitLight: Federated Imitation Learning for Plug-and-Play Autonomous Traffic Signal Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ye%2C+Y">Yutong Ye</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yingbo Zhou</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Zhusen Liu</a>, <a href="/search/?searchtype=author&query=Du%2C+X">Xiao Du</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Lian%2C+X">Xiang Lian</a>, <a href="/search/?searchtype=author&query=Chen%2C+M">Mingsong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11937v1-abstract-short" style="display: inline;"> Although Reinforcement Learning (RL)-based Traffic Signal Control (TSC) methods have been extensively studied, their practical applications still raise some serious issues such as high learning cost and poor generalizability. This is because the ``trial-and-error'' training style makes RL agents extremely dependent on the specific traffic environment, which also requires a long convergence time. T… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11937v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11937v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11937v1-abstract-full" style="display: none;"> Although Reinforcement Learning (RL)-based Traffic Signal Control (TSC) methods have been extensively studied, their practical applications still raise some serious issues such as high learning cost and poor generalizability. This is because the ``trial-and-error'' training style makes RL agents extremely dependent on the specific traffic environment, which also requires a long convergence time. To address these issues, we propose a novel Federated Imitation Learning (FIL)-based framework for multi-intersection TSC, named FitLight, which allows RL agents to plug-and-play for any traffic environment without additional pre-training cost. Unlike existing imitation learning approaches that rely on pre-training RL agents with demonstrations, FitLight allows real-time imitation learning and seamless transition to reinforcement learning. Due to our proposed knowledge-sharing mechanism and novel hybrid pressure-based agent design, RL agents can quickly find a best control policy with only a few episodes. Moreover, for resource-constrained TSC scenarios, FitLight supports model pruning and heterogeneous model aggregation, such that RL agents can work on a micro-controller with merely 16{\it KB} RAM and 32{\it KB} ROM. Extensive experiments demonstrate that, compared to state-of-the-art methods, FitLight not only provides a superior starting point but also converges to a better final solution on both real-world and synthetic datasets, even under extreme resource limitations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11937v1-abstract-full').style.display = 'none'; document.getElementById('2502.11937v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11916">arXiv:2502.11916</a> <span> [<a href="https://arxiv.org/pdf/2502.11916">pdf</a>, <a href="https://arxiv.org/format/2502.11916">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EssayJudge: A Multi-Granular Benchmark for Assessing Automated Essay Scoring Capabilities of Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Su%2C+J">Jiamin Su</a>, <a href="/search/?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/?searchtype=author&query=Fu%2C+F">Fangteng Fu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Han Zhang</a>, <a href="/search/?searchtype=author&query=Ye%2C+J">Jingheng Ye</a>, <a href="/search/?searchtype=author&query=Liu%2C+X">Xiang Liu</a>, <a href="/search/?searchtype=author&query=Huo%2C+J">Jiahao Huo</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Huiyu Zhou</a>, <a href="/search/?searchtype=author&query=Hu%2C+X">Xuming Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11916v1-abstract-short" style="display: inline;"> Automated Essay Scoring (AES) plays a crucial role in educational assessment by providing scalable and consistent evaluations of writing tasks. However, traditional AES systems face three major challenges: (1) reliance on handcrafted features that limit generalizability, (2) difficulty in capturing fine-grained traits like coherence and argumentation, and (3) inability to handle multimodal context… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11916v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11916v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11916v1-abstract-full" style="display: none;"> Automated Essay Scoring (AES) plays a crucial role in educational assessment by providing scalable and consistent evaluations of writing tasks. However, traditional AES systems face three major challenges: (1) reliance on handcrafted features that limit generalizability, (2) difficulty in capturing fine-grained traits like coherence and argumentation, and (3) inability to handle multimodal contexts. In the era of Multimodal Large Language Models (MLLMs), we propose EssayJudge, the first multimodal benchmark to evaluate AES capabilities across lexical-, sentence-, and discourse-level traits. By leveraging MLLMs' strengths in trait-specific scoring and multimodal context understanding, EssayJudge aims to offer precise, context-rich evaluations without manual feature engineering, addressing longstanding AES limitations. Our experiments with 18 representative MLLMs reveal gaps in AES performance compared to human evaluation, particularly in discourse-level traits, highlighting the need for further advancements in MLLM-based AES research. Our dataset and code will be available upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11916v1-abstract-full').style.display = 'none'; document.getElementById('2502.11916v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">JS and YY are co-first authors. XH is the corresponding author</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11880">arXiv:2502.11880</a> <span> [<a href="https://arxiv.org/pdf/2502.11880">pdf</a>, <a href="https://arxiv.org/format/2502.11880">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Bitnet.cpp: Efficient Edge Inference for Ternary LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+J">Jinheng Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hansong Zhou</a>, <a href="/search/?searchtype=author&query=Song%2C+T">Ting Song</a>, <a href="/search/?searchtype=author&query=Cao%2C+S">Shijie Cao</a>, <a href="/search/?searchtype=author&query=Xia%2C+Y">Yan Xia</a>, <a href="/search/?searchtype=author&query=Cao%2C+T">Ting Cao</a>, <a href="/search/?searchtype=author&query=Wei%2C+J">Jianyu Wei</a>, <a href="/search/?searchtype=author&query=Ma%2C+S">Shuming Ma</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Hongyu Wang</a>, <a href="/search/?searchtype=author&query=Wei%2C+F">Furu Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11880v1-abstract-short" style="display: inline;"> The advent of 1-bit large language models (LLMs), led by BitNet b1.58, has spurred interest in ternary LLMs. Despite this, research and practical applications focusing on efficient edge inference for ternary LLMs remain scarce. To bridge this gap, we introduce Bitnet.cpp, an inference system optimized for BitNet b1.58 and ternary LLMs. Given that mixed-precision matrix multiplication (mpGEMM) cons… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11880v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11880v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11880v1-abstract-full" style="display: none;"> The advent of 1-bit large language models (LLMs), led by BitNet b1.58, has spurred interest in ternary LLMs. Despite this, research and practical applications focusing on efficient edge inference for ternary LLMs remain scarce. To bridge this gap, we introduce Bitnet.cpp, an inference system optimized for BitNet b1.58 and ternary LLMs. Given that mixed-precision matrix multiplication (mpGEMM) constitutes the bulk of inference time in ternary LLMs, Bitnet.cpp incorporates a novel mpGEMM library to facilitate sub-2-bits-per-weight, efficient and lossless inference. The library features two core solutions: Ternary Lookup Table (TL), which addresses spatial inefficiencies of previous bit-wise methods, and Int2 with a Scale (I2_S), which ensures lossless edge inference, both enabling high-speed inference. Our experiments show that Bitnet.cpp achieves up to a 6.25x increase in speed over full-precision baselines and up to 2.32x over low-bit baselines, setting new benchmarks in the field. Additionally, we expand TL to element-wise lookup table (ELUT) for low-bit LLMs in the appendix, presenting both theoretical and empirical evidence of its considerable potential. Bitnet.cpp is publicly available at https://github.com/microsoft/BitNet/tree/paper , offering a sophisticated solution for the efficient and practical deployment of edge LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11880v1-abstract-full').style.display = 'none'; document.getElementById('2502.11880v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11817">arXiv:2502.11817</a> <span> [<a href="https://arxiv.org/pdf/2502.11817">pdf</a>, <a href="https://arxiv.org/format/2502.11817">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TLT.2024.3521898">10.1109/TLT.2024.3521898 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> AAKT: Enhancing Knowledge Tracing with Alternate Autoregressive Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Rong%2C+W">Wenge Rong</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jianfei Zhang</a>, <a href="/search/?searchtype=author&query=Sun%2C+Q">Qing Sun</a>, <a href="/search/?searchtype=author&query=Ouyang%2C+Y">Yuanxin Ouyang</a>, <a href="/search/?searchtype=author&query=Xiong%2C+Z">Zhang Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11817v1-abstract-short" style="display: inline;"> Knowledge Tracing (KT) aims to predict students' future performances based on their former exercises and additional information in educational settings. KT has received significant attention since it facilitates personalized experiences in educational situations. Simultaneously, the autoregressive modeling on the sequence of former exercises has been proven effective for this task. One of the prim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11817v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11817v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11817v1-abstract-full" style="display: none;"> Knowledge Tracing (KT) aims to predict students' future performances based on their former exercises and additional information in educational settings. KT has received significant attention since it facilitates personalized experiences in educational situations. Simultaneously, the autoregressive modeling on the sequence of former exercises has been proven effective for this task. One of the primary challenges in autoregressive modeling for Knowledge Tracing is effectively representing the anterior (pre-response) and posterior (post-response) states of learners across exercises. Existing methods often employ complex model architectures to update learner states using question and response records. In this study, we propose a novel perspective on knowledge tracing task by treating it as a generative process, consistent with the principles of autoregressive models. We demonstrate that knowledge states can be directly represented through autoregressive encodings on a question-response alternate sequence, where model generate the most probable representation in hidden state space by analyzing history interactions. This approach underpins our framework, termed Alternate Autoregressive Knowledge Tracing (AAKT). Additionally, we incorporate supplementary educational information, such as question-related skills, into our framework through an auxiliary task, and include extra exercise details, like response time, as additional inputs. Our proposed framework is implemented using advanced autoregressive technologies from Natural Language Generation (NLG) for both training and prediction. Empirical evaluations on four real-world KT datasets indicate that AAKT consistently outperforms all baseline models in terms of AUC, ACC, and RMSE. Furthermore, extensive ablation studies and visualized analysis validate the effectiveness of key components in AAKT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11817v1-abstract-full').style.display = 'none'; document.getElementById('2502.11817v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Learning Technologies, vol. 18, pp. 25-38, 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11534">arXiv:2502.11534</a> <span> [<a href="https://arxiv.org/pdf/2502.11534">pdf</a>, <a href="https://arxiv.org/format/2502.11534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SurgPose: a Dataset for Articulated Robotic Surgical Tool Pose Estimation and Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wu%2C+Z">Zijian Wu</a>, <a href="/search/?searchtype=author&query=Schmidt%2C+A">Adam Schmidt</a>, <a href="/search/?searchtype=author&query=Moore%2C+R">Randy Moore</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Haoying Zhou</a>, <a href="/search/?searchtype=author&query=Banks%2C+A">Alexandre Banks</a>, <a href="/search/?searchtype=author&query=Kazanzides%2C+P">Peter Kazanzides</a>, <a href="/search/?searchtype=author&query=Salcudean%2C+S+E">Septimiu E. Salcudean</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11534v1-abstract-short" style="display: inline;"> Accurate and efficient surgical robotic tool pose estimation is of fundamental significance to downstream applications such as augmented reality (AR) in surgical training and learning-based autonomous manipulation. While significant advancements have been made in pose estimation for humans and animals, it is still a challenge in surgical robotics due to the scarcity of published data. The relative… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11534v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11534v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11534v1-abstract-full" style="display: none;"> Accurate and efficient surgical robotic tool pose estimation is of fundamental significance to downstream applications such as augmented reality (AR) in surgical training and learning-based autonomous manipulation. While significant advancements have been made in pose estimation for humans and animals, it is still a challenge in surgical robotics due to the scarcity of published data. The relatively large absolute error of the da Vinci end effector kinematics and arduous calibration procedure make calibrated kinematics data collection expensive. Driven by this limitation, we collected a dataset, dubbed SurgPose, providing instance-aware semantic keypoints and skeletons for visual surgical tool pose estimation and tracking. By marking keypoints using ultraviolet (UV) reactive paint, which is invisible under white light and fluorescent under UV light, we execute the same trajectory under different lighting conditions to collect raw videos and keypoint annotations, respectively. The SurgPose dataset consists of approximately 120k surgical instrument instances (80k for training and 40k for validation) of 6 categories. Each instrument instance is labeled with 7 semantic keypoints. Since the videos are collected in stereo pairs, the 2D pose can be lifted to 3D based on stereo-matching depth. In addition to releasing the dataset, we test a few baseline approaches to surgical instrument tracking to demonstrate the utility of SurgPose. More details can be found at surgpose.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11534v1-abstract-full').style.display = 'none'; document.getElementById('2502.11534v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICRA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11328">arXiv:2502.11328</a> <span> [<a href="https://arxiv.org/pdf/2502.11328">pdf</a>, <a href="https://arxiv.org/format/2502.11328">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="General Relativity and Quantum Cosmology">gr-qc</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> </div> </div> <p class="title is-5 mathjax"> Progress of the TianQin project </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Luo%2C+J">Jun Luo</a>, <a href="/search/?searchtype=author&query=Bai%2C+S">Shaojun Bai</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Yan-Zheng Bai</a>, <a href="/search/?searchtype=author&query=Cai%2C+L">Lin Cai</a>, <a href="/search/?searchtype=author&query=Dang%2C+H">Hao Dang</a>, <a href="/search/?searchtype=author&query=Dong%2C+Q">Qijia Dong</a>, <a href="/search/?searchtype=author&query=Duan%2C+H">Hui-Zong Duan</a>, <a href="/search/?searchtype=author&query=Du%2C+Y">Yuanbo Du</a>, <a href="/search/?searchtype=author&query=Fan%2C+L">Lei Fan</a>, <a href="/search/?searchtype=author&query=Fu%2C+X">Xinju Fu</a>, <a href="/search/?searchtype=author&query=Gao%2C+Y">Yong Gao</a>, <a href="/search/?searchtype=author&query=Gou%2C+X">Xingyu Gou</a>, <a href="/search/?searchtype=author&query=Guo%2C+C">Changlei Guo</a>, <a href="/search/?searchtype=author&query=Hong%2C+W">Wei Hong</a>, <a href="/search/?searchtype=author&query=Hu%2C+B">Bin Hu</a>, <a href="/search/?searchtype=author&query=Hu%2C+H">Heran Hu</a>, <a href="/search/?searchtype=author&query=Hu%2C+M">Ming Hu</a>, <a href="/search/?searchtype=author&query=Hu%2C+Y">Yi-Ming Hu</a>, <a href="/search/?searchtype=author&query=Huang%2C+F+P">Fa Peng Huang</a>, <a href="/search/?searchtype=author&query=Gu%2C+D">Defeng Gu</a>, <a href="/search/?searchtype=author&query=Ji%2C+X">Xin Ji</a>, <a href="/search/?searchtype=author&query=Jiang%2C+Y">Yuan-Ze Jiang</a>, <a href="/search/?searchtype=author&query=Li%2C+E">En-Kun Li</a>, <a href="/search/?searchtype=author&query=Li%2C+H">Hongyin Li</a>, <a href="/search/?searchtype=author&query=Li%2C+M">Ming Li</a> , et al. (76 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11328v1-abstract-short" style="display: inline;"> TianQin is a future space-based gravitational wave observatory targeting the frequency window of $10^{-4}$ Hz $\sim 1$ Hz. A large variety of gravitational wave sources are expected in this frequency band, including the merger of massive black hole binaries, the inspiral of extreme/intermediate mass ratio systems, stellar-mass black hole binaries, Galactic compact binaries, and so on. TianQin will… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11328v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11328v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11328v1-abstract-full" style="display: none;"> TianQin is a future space-based gravitational wave observatory targeting the frequency window of $10^{-4}$ Hz $\sim 1$ Hz. A large variety of gravitational wave sources are expected in this frequency band, including the merger of massive black hole binaries, the inspiral of extreme/intermediate mass ratio systems, stellar-mass black hole binaries, Galactic compact binaries, and so on. TianQin will consist of three Earth orbiting satellites on nearly identical orbits with orbital radii of about $10^5$ km. The satellites will form a normal triangle constellation whose plane is nearly perpendicular to the ecliptic plane. The TianQin project has been progressing smoothly following the ``0123" technology roadmap. In step ``0", the TianQin laser ranging station has been constructed and it has successfully ranged to all the five retro-reflectors on the Moon. In step ``1", the drag-free control technology has been tested and demonstrated using the TianQin-1 satellite. In step ``2", the inter-satellite laser interferometry technology will be tested using the pair of TianQin-2 satellites. The TianQin-2 mission has been officially approved and the satellites will be launched around 2026. In step ``3", i.e., the TianQin-3 mission, three identical satellites will be launched around 2035 to form the space-based gravitational wave detector, TianQin, and to start gravitational wave detection in space. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11328v1-abstract-full').style.display = 'none'; document.getElementById('2502.11328v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">45 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11123">arXiv:2502.11123</a> <span> [<a href="https://arxiv.org/pdf/2502.11123">pdf</a>, <a href="https://arxiv.org/format/2502.11123">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DuplexMamba: Enhancing Real-time Speech Conversations with Duplex and Streaming Capabilities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lu%2C+X">Xiangyu Lu</a>, <a href="/search/?searchtype=author&query=Xu%2C+W">Wang Xu</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyun Zhou</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Haiyan Zhao</a>, <a href="/search/?searchtype=author&query=Zhu%2C+C">Conghui Zhu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+T">Tiejun Zhao</a>, <a href="/search/?searchtype=author&query=Yang%2C+M">Muyun Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11123v1-abstract-short" style="display: inline;"> Real-time speech conversation is essential for natural and efficient human-machine interactions, requiring duplex and streaming capabilities. Traditional Transformer-based conversational chatbots operate in a turn-based manner and exhibit quadratic computational complexity that grows as the input size increases. In this paper, we propose DuplexMamba, a Mamba-based end-to-end multimodal duplex mode… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11123v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11123v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11123v1-abstract-full" style="display: none;"> Real-time speech conversation is essential for natural and efficient human-machine interactions, requiring duplex and streaming capabilities. Traditional Transformer-based conversational chatbots operate in a turn-based manner and exhibit quadratic computational complexity that grows as the input size increases. In this paper, we propose DuplexMamba, a Mamba-based end-to-end multimodal duplex model for speech-to-text conversation. DuplexMamba enables simultaneous input processing and output generation, dynamically adjusting to support real-time streaming. Specifically, we develop a Mamba-based speech encoder and adapt it with a Mamba-based language model. Furthermore, we introduce a novel duplex decoding strategy that enables DuplexMamba to process input and generate output simultaneously. Experimental results demonstrate that DuplexMamba successfully implements duplex and streaming capabilities while achieving performance comparable to several recently developed Transformer-based models in automatic speech recognition (ASR) tasks and voice assistant benchmark evaluations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11123v1-abstract-full').style.display = 'none'; document.getElementById('2502.11123v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11047">arXiv:2502.11047</a> <span> [<a href="https://arxiv.org/pdf/2502.11047">pdf</a>, <a href="https://arxiv.org/ps/2502.11047">ps</a>, <a href="https://arxiv.org/format/2502.11047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Search for the Cabibbo-suppressed decays $螞_c^{+}\to危^0K^{+}蟺^{0}$ and $螞_c^{+}\to危^0K^{+}蟺^{+}蟺^{-}$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=BESIII+Collaboration"> BESIII Collaboration</a>, <a href="/search/?searchtype=author&query=Ablikim%2C+M">M. Ablikim</a>, <a href="/search/?searchtype=author&query=Achasov%2C+M+N">M. N. Achasov</a>, <a href="/search/?searchtype=author&query=Adlarson%2C+P">P. Adlarson</a>, <a href="/search/?searchtype=author&query=Ai%2C+X+C">X. C. Ai</a>, <a href="/search/?searchtype=author&query=Aliberti%2C+R">R. Aliberti</a>, <a href="/search/?searchtype=author&query=Amoroso%2C+A">A. Amoroso</a>, <a href="/search/?searchtype=author&query=An%2C+Q">Q. An</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Y. Bai</a>, <a href="/search/?searchtype=author&query=Bakina%2C+O">O. Bakina</a>, <a href="/search/?searchtype=author&query=Ban%2C+Y">Y. Ban</a>, <a href="/search/?searchtype=author&query=Bao%2C+H+-">H. -R. Bao</a>, <a href="/search/?searchtype=author&query=Batozskaya%2C+V">V. Batozskaya</a>, <a href="/search/?searchtype=author&query=Begzsuren%2C+K">K. Begzsuren</a>, <a href="/search/?searchtype=author&query=Berger%2C+N">N. Berger</a>, <a href="/search/?searchtype=author&query=Berlowski%2C+M">M. Berlowski</a>, <a href="/search/?searchtype=author&query=Bertani%2C+M">M. Bertani</a>, <a href="/search/?searchtype=author&query=Bettoni%2C+D">D. Bettoni</a>, <a href="/search/?searchtype=author&query=Bianchi%2C+F">F. Bianchi</a>, <a href="/search/?searchtype=author&query=Bianco%2C+E">E. Bianco</a>, <a href="/search/?searchtype=author&query=Bortone%2C+A">A. Bortone</a>, <a href="/search/?searchtype=author&query=Boyko%2C+I">I. Boyko</a>, <a href="/search/?searchtype=author&query=Briere%2C+R+A">R. A. Briere</a>, <a href="/search/?searchtype=author&query=Brueggemann%2C+A">A. Brueggemann</a>, <a href="/search/?searchtype=author&query=Cai%2C+H">H. Cai</a> , et al. (687 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11047v1-abstract-short" style="display: inline;"> Utilizing 4.5 $fb^-$ of $e^+e^-$ annihilation data collected at center-of-mass energies ranging from 4599.53 MeV to 4698.82 MeV by the BESIII detector at the BEPCII collider, we search for the singly Cabibbo-suppressed hadronic decays $螞_{c}^{+}\to危^{0} K^{+}蟺^{0}$ and $螞_{c}^{+}\to危^{0}K^{+}蟺^+蟺^-$ with a single-tag method. No significant signals are observed for both decays. The upper limits on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11047v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11047v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11047v1-abstract-full" style="display: none;"> Utilizing 4.5 $fb^-$ of $e^+e^-$ annihilation data collected at center-of-mass energies ranging from 4599.53 MeV to 4698.82 MeV by the BESIII detector at the BEPCII collider, we search for the singly Cabibbo-suppressed hadronic decays $螞_{c}^{+}\to危^{0} K^{+}蟺^{0}$ and $螞_{c}^{+}\to危^{0}K^{+}蟺^+蟺^-$ with a single-tag method. No significant signals are observed for both decays. The upper limits on the branching fractions at the $90\%$ confidence level are determined to be $5.0\times 10^{-4}$ for $螞_{c}^{+}\to危^{0} K^{+}蟺^{0}$ and $6.5\times 10^{-4}$ for $螞_c^{+}\to危^0K^{+}蟺^{+}蟺^{-}$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11047v1-abstract-full').style.display = 'none'; document.getElementById('2502.11047v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10241">arXiv:2502.10241</a> <span> [<a href="https://arxiv.org/pdf/2502.10241">pdf</a>, <a href="https://arxiv.org/format/2502.10241">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Astrophysical Phenomena">astro-ph.HE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3847/1538-4365/adb419">10.3847/1538-4365/adb419 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Extract cleaned Swift/UVOT UV grism spectra with uvotpy package </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Covino%2C+S">Stefano Covino</a>, <a href="/search/?searchtype=author&query=Jin%2C+Z">Zhi-Ping Jin</a>, <a href="/search/?searchtype=author&query=Fan%2C+Y">Yi-Zhong Fan</a>, <a href="/search/?searchtype=author&query=Wei%2C+D">Da-Ming Wei</a>, <a href="/search/?searchtype=author&query=Kuin%2C+N+P">N. Paul Kuin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10241v2-abstract-short" style="display: inline;"> The ultraviolet/optical telescope (UVOT) onboard the Neil Gehrels Swift Observatory is capable of imaging with 7 lenticular filters and of taking slitless spectra with 2 grisms. Both image and grism data have been widely used to study gamma-ray bursts, supernovae and other ultraviolet/optical transients, and proved UVOT is a powerful instrument in time-domain astronomy. However, the second order c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10241v2-abstract-full').style.display = 'inline'; document.getElementById('2502.10241v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10241v2-abstract-full" style="display: none;"> The ultraviolet/optical telescope (UVOT) onboard the Neil Gehrels Swift Observatory is capable of imaging with 7 lenticular filters and of taking slitless spectra with 2 grisms. Both image and grism data have been widely used to study gamma-ray bursts, supernovae and other ultraviolet/optical transients, and proved UVOT is a powerful instrument in time-domain astronomy. However, the second order contamination, for blue sources, strongly limits the red end of ultraviolet (UV) grism spectra. This, in turn, reduces the valid wavelength range to only about 33% of the total. However, to explore the broadband spectral energy distribution of GRBs at the early stage, a larger valid wavelength range is required. Hence based on the uvotpy package, we propose a method to remove the second order contamination from UV grism spectra (nominal mode) up to about 4000脜, i.e., about 70% of the full wavelength range. The 1-sigma systematic uncertainty of this method is about 11.2%. In addition, if a source is red enough, the red end of the valid range could reach about 5000脜. The source code is available on GitHub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10241v2-abstract-full').style.display = 'none'; document.getElementById('2502.10241v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 8 figures, 3 tables. Accepted by the ApJS. Updated the DOI & acknowledgments in V2</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09542">arXiv:2502.09542</a> <span> [<a href="https://arxiv.org/pdf/2502.09542">pdf</a>, <a href="https://arxiv.org/format/2502.09542">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Constant-Overhead Fault-Tolerant Bell-Pair Distillation using High-Rate Codes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ataides%2C+J+P+B">J. Pablo Bonilla Ataides</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hengyun Zhou</a>, <a href="/search/?searchtype=author&query=Xu%2C+Q">Qian Xu</a>, <a href="/search/?searchtype=author&query=Baranes%2C+G">Gefen Baranes</a>, <a href="/search/?searchtype=author&query=Li%2C+B">Bikun Li</a>, <a href="/search/?searchtype=author&query=Lukin%2C+M+D">Mikhail D. Lukin</a>, <a href="/search/?searchtype=author&query=Jiang%2C+L">Liang Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09542v1-abstract-short" style="display: inline;"> We present a fault-tolerant Bell-pair distillation scheme achieving constant overhead through high-rate quantum low-density parity-check (qLDPC) codes. Our approach maintains a constant distillation rate equal to the code rate - as high as $1/3$ in our implementations - while requiring no additional overhead beyond the physical qubits of the code. Full circuit-level analysis demonstrates fault-tol… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09542v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09542v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09542v1-abstract-full" style="display: none;"> We present a fault-tolerant Bell-pair distillation scheme achieving constant overhead through high-rate quantum low-density parity-check (qLDPC) codes. Our approach maintains a constant distillation rate equal to the code rate - as high as $1/3$ in our implementations - while requiring no additional overhead beyond the physical qubits of the code. Full circuit-level analysis demonstrates fault-tolerance for input Bell pair infidelities below a threshold $\sim 5\%$, readily achievable with near-term capabilities. Unlike previous proposals, our scheme keeps the output Bell pairs encoded in qLDPC codes at each node, eliminating decoding overhead and enabling direct use in distributed quantum applications through recent advances in qLDPC computation. These results establish qLDPC-based distillation as a practical route toward resource-efficient quantum networks and distributed quantum computing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09542v1-abstract-full').style.display = 'none'; document.getElementById('2502.09542v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09346">arXiv:2502.09346</a> <span> [<a href="https://arxiv.org/pdf/2502.09346">pdf</a>, <a href="https://arxiv.org/format/2502.09346">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Analysis, Statistics and Probability">physics.data-an</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Fluid Dynamics">physics.flu-dyn</span> </div> </div> <p class="title is-5 mathjax"> Machine learning for modelling unstructured grid data in computational physics: a review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cheng%2C+S">Sibo Cheng</a>, <a href="/search/?searchtype=author&query=Bocquet%2C+M">Marc Bocquet</a>, <a href="/search/?searchtype=author&query=Ding%2C+W">Weiping Ding</a>, <a href="/search/?searchtype=author&query=Finn%2C+T+S">Tobias Sebastian Finn</a>, <a href="/search/?searchtype=author&query=Fu%2C+R">Rui Fu</a>, <a href="/search/?searchtype=author&query=Fu%2C+J">Jinlong Fu</a>, <a href="/search/?searchtype=author&query=Guo%2C+Y">Yike Guo</a>, <a href="/search/?searchtype=author&query=Johnson%2C+E">Eleda Johnson</a>, <a href="/search/?searchtype=author&query=Li%2C+S">Siyi Li</a>, <a href="/search/?searchtype=author&query=Liu%2C+C">Che Liu</a>, <a href="/search/?searchtype=author&query=Moro%2C+E+N">Eric Newton Moro</a>, <a href="/search/?searchtype=author&query=Pan%2C+J">Jie Pan</a>, <a href="/search/?searchtype=author&query=Piggott%2C+M">Matthew Piggott</a>, <a href="/search/?searchtype=author&query=Quilodran%2C+C">Cesar Quilodran</a>, <a href="/search/?searchtype=author&query=Sharma%2C+P">Prakhar Sharma</a>, <a href="/search/?searchtype=author&query=Wang%2C+K">Kun Wang</a>, <a href="/search/?searchtype=author&query=Xiao%2C+D">Dunhui Xiao</a>, <a href="/search/?searchtype=author&query=Xue%2C+X">Xiao Xue</a>, <a href="/search/?searchtype=author&query=Zeng%2C+Y">Yong Zeng</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Mingrui Zhang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Zhu%2C+K">Kewei Zhu</a>, <a href="/search/?searchtype=author&query=Arcucci%2C+R">Rossella Arcucci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09346v1-abstract-short" style="display: inline;"> Unstructured grid data are essential for modelling complex geometries and dynamics in computational physics. Yet, their inherent irregularity presents significant challenges for conventional machine learning (ML) techniques. This paper provides a comprehensive review of advanced ML methodologies designed to handle unstructured grid data in high-dimensional dynamical systems. Key approaches discuss… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09346v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09346v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09346v1-abstract-full" style="display: none;"> Unstructured grid data are essential for modelling complex geometries and dynamics in computational physics. Yet, their inherent irregularity presents significant challenges for conventional machine learning (ML) techniques. This paper provides a comprehensive review of advanced ML methodologies designed to handle unstructured grid data in high-dimensional dynamical systems. Key approaches discussed include graph neural networks, transformer models with spatial attention mechanisms, interpolation-integrated ML methods, and meshless techniques such as physics-informed neural networks. These methodologies have proven effective across diverse fields, including fluid dynamics and environmental simulations. This review is intended as a guidebook for computational scientists seeking to apply ML approaches to unstructured grid data in their domains, as well as for ML researchers looking to address challenges in computational physics. It places special focus on how ML methods can overcome the inherent limitations of traditional numerical techniques and, conversely, how insights from computational physics can inform ML development. To support benchmarking, this review also provides a summary of open-access datasets of unstructured grid data in computational physics. Finally, emerging directions such as generative models with unstructured data, reinforcement learning for mesh generation, and hybrid physics-data-driven paradigms are discussed to inspire future advancements in this evolving field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09346v1-abstract-full').style.display = 'none'; document.getElementById('2502.09346v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08929">arXiv:2502.08929</a> <span> [<a href="https://arxiv.org/pdf/2502.08929">pdf</a>, <a href="https://arxiv.org/ps/2502.08929">ps</a>, <a href="https://arxiv.org/format/2502.08929">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Precise Measurement of the $蠂_{c0}$ Resonance Parameters and Branching Fractions of $蠂_{c0,c2}\to蟺^+蟺^-/K^+K^-$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=BESIII+Collaboration"> BESIII Collaboration</a>, <a href="/search/?searchtype=author&query=Ablikim%2C+M">M. Ablikim</a>, <a href="/search/?searchtype=author&query=Achasov%2C+M+N">M. N. Achasov</a>, <a href="/search/?searchtype=author&query=Adlarson%2C+P">P. Adlarson</a>, <a href="/search/?searchtype=author&query=Afedulidis%2C+O">O. Afedulidis</a>, <a href="/search/?searchtype=author&query=Ai%2C+X+C">X. C. Ai</a>, <a href="/search/?searchtype=author&query=Aliberti%2C+R">R. Aliberti</a>, <a href="/search/?searchtype=author&query=Amoroso%2C+A">A. Amoroso</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Y. Bai</a>, <a href="/search/?searchtype=author&query=Bakina%2C+O">O. Bakina</a>, <a href="/search/?searchtype=author&query=Balossino%2C+I">I. Balossino</a>, <a href="/search/?searchtype=author&query=Ban%2C+Y">Y. Ban</a>, <a href="/search/?searchtype=author&query=Bao%2C+H+-">H. -R. Bao</a>, <a href="/search/?searchtype=author&query=Batozskaya%2C+V">V. Batozskaya</a>, <a href="/search/?searchtype=author&query=Begzsuren%2C+K">K. Begzsuren</a>, <a href="/search/?searchtype=author&query=Berger%2C+N">N. Berger</a>, <a href="/search/?searchtype=author&query=Berlowski%2C+M">M. Berlowski</a>, <a href="/search/?searchtype=author&query=Bertani%2C+M">M. Bertani</a>, <a href="/search/?searchtype=author&query=Bettoni%2C+D">D. Bettoni</a>, <a href="/search/?searchtype=author&query=Bianchi%2C+F">F. Bianchi</a>, <a href="/search/?searchtype=author&query=Bianco%2C+E">E. Bianco</a>, <a href="/search/?searchtype=author&query=Bortone%2C+A">A. Bortone</a>, <a href="/search/?searchtype=author&query=Boyko%2C+I">I. Boyko</a>, <a href="/search/?searchtype=author&query=Briere%2C+R+A">R. A. Briere</a>, <a href="/search/?searchtype=author&query=Brueggemann%2C+A">A. Brueggemann</a> , et al. (648 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08929v1-abstract-short" style="display: inline;"> By analyzing a $蠄(3686)$ data sample containing $(107.7\pm0.6)\times10^{6}$ events taken with the BESIII detector at the BEPCII storage ring in 2009, the $蠂_{c0}$ resonance parameters are precisely measured using $蠂_{c0,c2} \to 蟺^+蟺^-/K^+K^-$ events. The mass of $蠂_{c0}$ is determined to be $M(蠂_{c0})=(3415.67\pm0.07\pm0.06\pm0.07$)~MeV/$c^2$, and its full width is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08929v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08929v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08929v1-abstract-full" style="display: none;"> By analyzing a $蠄(3686)$ data sample containing $(107.7\pm0.6)\times10^{6}$ events taken with the BESIII detector at the BEPCII storage ring in 2009, the $蠂_{c0}$ resonance parameters are precisely measured using $蠂_{c0,c2} \to 蟺^+蟺^-/K^+K^-$ events. The mass of $蠂_{c0}$ is determined to be $M(蠂_{c0})=(3415.67\pm0.07\pm0.06\pm0.07$)~MeV/$c^2$, and its full width is $螕(蠂_{c0})=(12.44\pm0.12\pm0.12)~{\rm MeV}$, where the first uncertainty is statistical, the second systematic, and the third for mass comes from $蠂_{c2}$ mass uncertainty. These measurements improve the precision of $蠂_{c0}$ mass by a factor of four and width by one order of magnitude over the previous individual measurements, and significantly boost our knowledge about the charmonium spectrum. Together with additional $(345.4\pm2.6)\times10^{6}$ $蠄(3686)$ data events taken in 2012, the decay branching fractions of $蠂_{c0,c2}\to蟺^+蟺^-/K^+K^-$ are measured as well, with precision improved by a factor of three compared to previous measurements. These $蠂_{c0}$ decay branching fractions provide important inputs for the study of glueballs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08929v1-abstract-full').style.display = 'none'; document.getElementById('2502.08929v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07671">arXiv:2502.07671</a> <span> [<a href="https://arxiv.org/pdf/2502.07671">pdf</a>, <a href="https://arxiv.org/format/2502.07671">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Steering Protein Family Design through Profile Bayesian Flow </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gong%2C+J">Jingjing Gong</a>, <a href="/search/?searchtype=author&query=Pei%2C+Y">Yu Pei</a>, <a href="/search/?searchtype=author&query=Long%2C+S">Siyu Long</a>, <a href="/search/?searchtype=author&query=Song%2C+Y">Yuxuan Song</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhe Zhang</a>, <a href="/search/?searchtype=author&query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/?searchtype=author&query=Cao%2C+Z">Ziyao Cao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+S">Shuyi Zhang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Ma%2C+W">Wei-Ying Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07671v1-abstract-short" style="display: inline;"> Protein family design emerges as a promising alternative by combining the advantages of de novo protein design and mutation-based directed evolution.In this paper, we propose ProfileBFN, the Profile Bayesian Flow Networks, for specifically generative modeling of protein families. ProfileBFN extends the discrete Bayesian Flow Network from an MSA profile perspective, which can be trained on single p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07671v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07671v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07671v1-abstract-full" style="display: none;"> Protein family design emerges as a promising alternative by combining the advantages of de novo protein design and mutation-based directed evolution.In this paper, we propose ProfileBFN, the Profile Bayesian Flow Networks, for specifically generative modeling of protein families. ProfileBFN extends the discrete Bayesian Flow Network from an MSA profile perspective, which can be trained on single protein sequences by regarding it as a degenerate profile, thereby achieving efficient protein family design by avoiding large-scale MSA data construction and training. Empirical results show that ProfileBFN has a profound understanding of proteins. When generating diverse and novel family proteins, it can accurately capture the structural characteristics of the family. The enzyme produced by this method is more likely than the previous approach to have the corresponding function, offering better odds of generating diverse proteins with the desired functionality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07671v1-abstract-full').style.display = 'none'; document.getElementById('2502.07671v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07406">arXiv:2502.07406</a> <span> [<a href="https://arxiv.org/pdf/2502.07406">pdf</a>, <a href="https://arxiv.org/format/2502.07406">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Search for $e^+e^-\to K_S^0 K_S^0 h_c$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=BESIII+Collaboration"> BESIII Collaboration</a>, <a href="/search/?searchtype=author&query=Ablikim%2C+M">M. Ablikim</a>, <a href="/search/?searchtype=author&query=Achasov%2C+M+N">M. N. Achasov</a>, <a href="/search/?searchtype=author&query=Adlarson%2C+P">P. Adlarson</a>, <a href="/search/?searchtype=author&query=Afedulidis%2C+O">O. Afedulidis</a>, <a href="/search/?searchtype=author&query=Ai%2C+X+C">X. C. Ai</a>, <a href="/search/?searchtype=author&query=Aliberti%2C+R">R. Aliberti</a>, <a href="/search/?searchtype=author&query=Amoroso%2C+A">A. Amoroso</a>, <a href="/search/?searchtype=author&query=An%2C+Q">Q. An</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Y. Bai</a>, <a href="/search/?searchtype=author&query=Bakina%2C+O">O. Bakina</a>, <a href="/search/?searchtype=author&query=Balossino%2C+I">I. Balossino</a>, <a href="/search/?searchtype=author&query=Ban%2C+Y">Y. Ban</a>, <a href="/search/?searchtype=author&query=Bao%2C+H+-">H. -R. Bao</a>, <a href="/search/?searchtype=author&query=Batozskaya%2C+V">V. Batozskaya</a>, <a href="/search/?searchtype=author&query=Begzsuren%2C+K">K. Begzsuren</a>, <a href="/search/?searchtype=author&query=Berger%2C+N">N. Berger</a>, <a href="/search/?searchtype=author&query=Berlowski%2C+M">M. Berlowski</a>, <a href="/search/?searchtype=author&query=Bertani%2C+M">M. Bertani</a>, <a href="/search/?searchtype=author&query=Bettoni%2C+D">D. Bettoni</a>, <a href="/search/?searchtype=author&query=Bianchi%2C+F">F. Bianchi</a>, <a href="/search/?searchtype=author&query=Bianco%2C+E">E. Bianco</a>, <a href="/search/?searchtype=author&query=Bortone%2C+A">A. Bortone</a>, <a href="/search/?searchtype=author&query=Boyko%2C+I">I. Boyko</a>, <a href="/search/?searchtype=author&query=Briere%2C+R+A">R. A. Briere</a> , et al. (642 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07406v1-abstract-short" style="display: inline;"> Using $e^+e^-$ collision data at 13 center-of-mass energies ranging from 4.600 to 4.950 GeV collected with the BESIII detector, we search for the unmeasured $e^+e^-\to K_S^0 K_S^0 h_c$ process . No significant signal is observed, and the upper limits of the Born cross sections at each center-of-mass energy are presented. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07406v1-abstract-full" style="display: none;"> Using $e^+e^-$ collision data at 13 center-of-mass energies ranging from 4.600 to 4.950 GeV collected with the BESIII detector, we search for the unmeasured $e^+e^-\to K_S^0 K_S^0 h_c$ process . No significant signal is observed, and the upper limits of the Born cross sections at each center-of-mass energy are presented. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07406v1-abstract-full').style.display = 'none'; document.getElementById('2502.07406v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07194">arXiv:2502.07194</a> <span> [<a href="https://arxiv.org/pdf/2502.07194">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/electronics13122312">10.3390/electronics13122312 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dense Object Detection Based on De-homogenized Queries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Y">Yueming Huang</a>, <a href="/search/?searchtype=author&query=Ma%2C+C">Chenrui Ma</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Wu%2C+H">Hao Wu</a>, <a href="/search/?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07194v1-abstract-short" style="display: inline;"> Dense object detection is widely used in automatic driving, video surveillance, and other fields. This paper focuses on the challenging task of dense object detection. Currently, detection methods based on greedy algorithms, such as non-maximum suppression (NMS), often produce many repetitive predictions or missed detections in dense scenarios, which is a common problem faced by NMS-based algorith… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07194v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07194v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07194v1-abstract-full" style="display: none;"> Dense object detection is widely used in automatic driving, video surveillance, and other fields. This paper focuses on the challenging task of dense object detection. Currently, detection methods based on greedy algorithms, such as non-maximum suppression (NMS), often produce many repetitive predictions or missed detections in dense scenarios, which is a common problem faced by NMS-based algorithms. Through the end-to-end DETR (DEtection TRansformer), as a type of detector that can incorporate the post-processing de-duplication capability of NMS, etc., into the network, we found that homogeneous queries in the query-based detector lead to a reduction in the de-duplication capability of the network and the learning efficiency of the encoder, resulting in duplicate prediction and missed detection problems. To solve this problem, we propose learnable differentiated encoding to de-homogenize the queries, and at the same time, queries can communicate with each other via differentiated encoding information, replacing the previous self-attention among the queries. In addition, we used joint loss on the output of the encoder that considered both location and confidence prediction to give a higher-quality initialization for queries. Without cumbersome decoder stacking and guaranteeing accuracy, our proposed end-to-end detection framework was more concise and reduced the number of parameters by about 8% compared to deformable DETR. Our method achieved excellent results on the challenging CrowdHuman dataset with 93.6% average precision (AP), 39.2% MR-2, and 84.3% JI. The performance overperformed previous SOTA methods, such as Iter-E2EDet (Progressive End-to-End Object Detection) and MIP (One proposal, Multiple predictions). In addition, our method is more robust in various scenarios with different densities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07194v1-abstract-full').style.display = 'none'; document.getElementById('2502.07194v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07179">arXiv:2502.07179</a> <span> [<a href="https://arxiv.org/pdf/2502.07179">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3934/era.2024131">10.3934/era.2024131 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Improved YOLOv7 model for insulator defect detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhenyue Wang</a>, <a href="/search/?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Ma%2C+Y">Yi Ma</a>, <a href="/search/?searchtype=author&query=Ma%2C+Y">Yutang Ma</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07179v1-abstract-short" style="display: inline;"> Insulators are crucial insulation components and structural supports in power grids, playing a vital role in the transmission lines. Due to temperature fluctuations, internal stress, or damage from hail, insulators are prone to injury. Automatic detection of damaged insulators faces challenges such as diverse types, small defect targets, and complex backgrounds and shapes. Most research for detect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07179v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07179v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07179v1-abstract-full" style="display: none;"> Insulators are crucial insulation components and structural supports in power grids, playing a vital role in the transmission lines. Due to temperature fluctuations, internal stress, or damage from hail, insulators are prone to injury. Automatic detection of damaged insulators faces challenges such as diverse types, small defect targets, and complex backgrounds and shapes. Most research for detecting insulator defects has focused on a single defect type or a specific material. However, the insulators in the grid's transmission lines have different colors and materials. Various insulator defects coexist, and the existing methods have difficulty meeting the practical application requirements. Current methods suffer from low detection accuracy and mAP0.5 cannot meet application requirements. This paper proposes an improved YOLOv7 model for multi-type insulator defect detection. First, our model replaces the SPPCSPC module with the RFB module to enhance the network's feature extraction capability. Second, a CA mechanism is introduced into the head part to enhance the network's feature representation ability and to improve detection accuracy. Third, a WIoU loss function is employed to address the low-quality samples hindering model generalization during training, thereby improving the model's overall performance. The experimental results indicate that the proposed model exhibits enhancements across various performance metrics. Specifically, there is a 1.6% advancement in mAP_0.5, a corresponding 1.6% enhancement in mAP_0.5:0.95, a 1.3% elevation in precision, and a 1% increase in recall. Moreover, the model achieves parameter reduction by 3.2 million, leading to a decrease of 2.5 GFLOPS in computational cost. Notably, there is also an improvement of 2.81 milliseconds in single-image detection speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07179v1-abstract-full').style.display = 'none'; document.getElementById('2502.07179v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07175">arXiv:2502.07175</a> <span> [<a href="https://arxiv.org/pdf/2502.07175">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/app132312775">10.3390/app132312775 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Foreign-Object Detection in High-Voltage Transmission Line Based on Improved YOLOv8m </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhenyue Wang</a>, <a href="/search/?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Ma%2C+Y">Yi Ma</a>, <a href="/search/?searchtype=author&query=Ma%2C+Y">Yutang Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07175v1-abstract-short" style="display: inline;"> The safe operation of high-voltage transmission lines ensures the power grid's security. Various foreign objects attached to the transmission lines, such as balloons, kites and nesting birds, can significantly affect the safe and stable operation of high-voltage transmission lines. With the advancement of computer vision technology, periodic automatic inspection of foreign objects is efficient and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07175v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07175v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07175v1-abstract-full" style="display: none;"> The safe operation of high-voltage transmission lines ensures the power grid's security. Various foreign objects attached to the transmission lines, such as balloons, kites and nesting birds, can significantly affect the safe and stable operation of high-voltage transmission lines. With the advancement of computer vision technology, periodic automatic inspection of foreign objects is efficient and necessary. Existing detection methods have low accuracy because foreign objects at-tached to the transmission lines are complex, including occlusions, diverse object types, significant scale variations, and complex backgrounds. In response to the practical needs of the Yunnan Branch of China Southern Power Grid Co., Ltd., this paper proposes an improved YOLOv8m-based model for detecting foreign objects on transmission lines. Experiments are conducted on a dataset collected from Yunnan Power Grid. The proposed model enhances the original YOLOv8m by in-corporating a Global Attention Module (GAM) into the backbone to focus on occluded foreign objects, replacing the SPPF module with the SPPCSPC module to augment the model's multiscale feature extraction capability, and introducing the Focal-EIoU loss function to address the issue of high- and low-quality sample imbalances. These improvements accelerate model convergence and enhance detection accuracy. The experimental results demonstrate that our proposed model achieves a 2.7% increase in mAP_0.5, a 4% increase in mAP_0.5:0.95, and a 6% increase in recall. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07175v1-abstract-full').style.display = 'none'; document.getElementById('2502.07175v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 16 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06289">arXiv:2502.06289</a> <span> [<a href="https://arxiv.org/pdf/2502.06289">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Is an Ultra Large Natural Image-Based Foundation Model Superior to a Retina-Specific Model for Detecting Ocular and Systemic Diseases? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hou%2C+Q">Qingshan Hou</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yukun Zhou</a>, <a href="/search/?searchtype=author&query=Goh%2C+J+H+L">Jocelyn Hui Lin Goh</a>, <a href="/search/?searchtype=author&query=Zou%2C+K">Ke Zou</a>, <a href="/search/?searchtype=author&query=Yew%2C+S+M+E">Samantha Min Er Yew</a>, <a href="/search/?searchtype=author&query=Srinivasan%2C+S">Sahana Srinivasan</a>, <a href="/search/?searchtype=author&query=Wang%2C+M">Meng Wang</a>, <a href="/search/?searchtype=author&query=Lo%2C+T">Thaddaeus Lo</a>, <a href="/search/?searchtype=author&query=Lei%2C+X">Xiaofeng Lei</a>, <a href="/search/?searchtype=author&query=Wagner%2C+S+K">Siegfried K. Wagner</a>, <a href="/search/?searchtype=author&query=Chia%2C+M+A">Mark A. Chia</a>, <a href="/search/?searchtype=author&query=Yang%2C+D">Dawei Yang</a>, <a href="/search/?searchtype=author&query=Jiang%2C+H">Hongyang Jiang</a>, <a href="/search/?searchtype=author&query=Ran%2C+A">AnRan Ran</a>, <a href="/search/?searchtype=author&query=Santos%2C+R">Rui Santos</a>, <a href="/search/?searchtype=author&query=Somfai%2C+G+M">Gabor Mark Somfai</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J+H">Juan Helen Zhou</a>, <a href="/search/?searchtype=author&query=Chen%2C+H">Haoyu Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+Q">Qingyu Chen</a>, <a href="/search/?searchtype=author&query=Cheung%2C+C+Y">Carol Yim-Lui Cheung</a>, <a href="/search/?searchtype=author&query=Keane%2C+P+A">Pearse A. Keane</a>, <a href="/search/?searchtype=author&query=Tham%2C+Y+C">Yih Chung Tham</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06289v1-abstract-short" style="display: inline;"> The advent of foundation models (FMs) is transforming medical domain. In ophthalmology, RETFound, a retina-specific FM pre-trained sequentially on 1.4 million natural images and 1.6 million retinal images, has demonstrated high adaptability across clinical applications. Conversely, DINOv2, a general-purpose vision FM pre-trained on 142 million natural images, has shown promise in non-medical domai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06289v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06289v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06289v1-abstract-full" style="display: none;"> The advent of foundation models (FMs) is transforming medical domain. In ophthalmology, RETFound, a retina-specific FM pre-trained sequentially on 1.4 million natural images and 1.6 million retinal images, has demonstrated high adaptability across clinical applications. Conversely, DINOv2, a general-purpose vision FM pre-trained on 142 million natural images, has shown promise in non-medical domains. However, its applicability to clinical tasks remains underexplored. To address this, we conducted head-to-head evaluations by fine-tuning RETFound and three DINOv2 models (large, base, small) for ocular disease detection and systemic disease prediction tasks, across eight standardized open-source ocular datasets, as well as the Moorfields AlzEye and the UK Biobank datasets. DINOv2-large model outperformed RETFound in detecting diabetic retinopathy (AUROC=0.850-0.952 vs 0.823-0.944, across three datasets, all P<=0.007) and multi-class eye diseases (AUROC=0.892 vs. 0.846, P<0.001). In glaucoma, DINOv2-base model outperformed RETFound (AUROC=0.958 vs 0.940, P<0.001). Conversely, RETFound achieved superior performance over all DINOv2 models in predicting heart failure, myocardial infarction, and ischaemic stroke (AUROC=0.732-0.796 vs 0.663-0.771, all P<0.001). These trends persisted even with 10% of the fine-tuning data. These findings showcase the distinct scenarios where general-purpose and domain-specific FMs excel, highlighting the importance of aligning FM selection with task-specific requirements to optimise clinical performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06289v1-abstract-full').style.display = 'none'; document.getElementById('2502.06289v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06178">arXiv:2502.06178</a> <span> [<a href="https://arxiv.org/pdf/2502.06178">pdf</a>, <a href="https://arxiv.org/format/2502.06178">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Optimization by Kernel Regression and Density-based Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhu%2C+T">Tansheng Zhu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyu Zhou</a>, <a href="/search/?searchtype=author&query=Jin%2C+K">Ke Jin</a>, <a href="/search/?searchtype=author&query=Xu%2C+X">Xusheng Xu</a>, <a href="/search/?searchtype=author&query=Yuan%2C+Q">Qiufan Yuan</a>, <a href="/search/?searchtype=author&query=Ji%2C+L">Lijie Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06178v1-abstract-short" style="display: inline;"> Bayesian optimization is highly effective for optimizing expensive-to-evaluate black-box functions, but it faces significant computational challenges due to the high computational complexity of Gaussian processes, which results in a total time complexity that is quartic with respect to the number of iterations. To address this limitation, we propose the Bayesian Optimization by Kernel regression a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06178v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06178v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06178v1-abstract-full" style="display: none;"> Bayesian optimization is highly effective for optimizing expensive-to-evaluate black-box functions, but it faces significant computational challenges due to the high computational complexity of Gaussian processes, which results in a total time complexity that is quartic with respect to the number of iterations. To address this limitation, we propose the Bayesian Optimization by Kernel regression and density-based Exploration (BOKE) algorithm. BOKE uses kernel regression for efficient function approximation, kernel density for exploration, and the improved kernel regression upper confidence bound criteria to guide the optimization process, thus reducing computational costs to quadratic. Our theoretical analysis rigorously establishes the global convergence of BOKE and ensures its robustness. Through extensive numerical experiments on both synthetic and real-world optimization tasks, we demonstrate that BOKE not only performs competitively compared to Gaussian process-based methods but also exhibits superior computational efficiency. These results highlight BOKE's effectiveness in resource-constrained environments, providing a practical approach for optimization problems in engineering applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06178v1-abstract-full').style.display = 'none'; document.getElementById('2502.06178v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06127">arXiv:2502.06127</a> <span> [<a href="https://arxiv.org/pdf/2502.06127">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3934/mbe.2023334">10.3934/mbe.2023334 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Improved YOLOv5s model for key components detection of power transmission lines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Ma%2C+Y">Yi Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06127v1-abstract-short" style="display: inline;"> High-voltage transmission lines are located far from the road, resulting in inconvenient inspection work and rising maintenance costs. Intelligent inspection of power transmission lines has become increasingly important. However, subsequent intelligent inspection relies on accurately detecting various key components. Due to the low detection accuracy of key components in transmission line image in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06127v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06127v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06127v1-abstract-full" style="display: none;"> High-voltage transmission lines are located far from the road, resulting in inconvenient inspection work and rising maintenance costs. Intelligent inspection of power transmission lines has become increasingly important. However, subsequent intelligent inspection relies on accurately detecting various key components. Due to the low detection accuracy of key components in transmission line image inspection, this paper proposed an improved object detection model based on the YOLOv5s (You Only Look Once Version 5 Small) model to improve the detection accuracy of key components of transmission lines. According to the characteristics of the power grid inspection image, we first modify the distance measurement in the k-means clustering to improve the anchor matching of the YOLOv5s model. Then, we add the convolutional block attention module (CBAM) attention mechanism to the backbone network to improve accuracy. Finally, we apply the focal loss function to reduce the impact of class imbalance. Our improved method's mAP (mean average precision) reached 98.1%, the precision reached 97.5%, the recall reached 94.4%, and the detection rate reached 84.8 FPS (frames per second). The experimental results show that our improved model improves detection accuracy and has performance advantages over other models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06127v1-abstract-full').style.display = 'none'; document.getElementById('2502.06127v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06119">arXiv:2502.06119</a> <span> [<a href="https://arxiv.org/pdf/2502.06119">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/electronics11142182">10.3390/electronics11142182 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> An Appearance Defect Detection Method for Cigarettes Based on C-CenterNet </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+H">Hongyu Liu</a>, <a href="/search/?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a>, <a href="/search/?searchtype=author&query=Yang%2C+L">Lei Yang</a>, <a href="/search/?searchtype=author&query=Liu%2C+K">Kunxiao Liu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06119v1-abstract-short" style="display: inline;"> Due to the poor adaptability of traditional methods in the cigarette detection task on the automatic cigarette production line, it is difficult to accurately identify whether a cigarette has defects and the types of defects; thus, a cigarette appearance defect detection method based on C-CenterNet is proposed. This detector uses keypoint estimation to locate center points and regresses all other d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06119v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06119v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06119v1-abstract-full" style="display: none;"> Due to the poor adaptability of traditional methods in the cigarette detection task on the automatic cigarette production line, it is difficult to accurately identify whether a cigarette has defects and the types of defects; thus, a cigarette appearance defect detection method based on C-CenterNet is proposed. This detector uses keypoint estimation to locate center points and regresses all other defect properties. Firstly, Resnet50 is used as the backbone feature extraction network, and the convolutional block attention mechanism (CBAM) is introduced to enhance the network's ability to extract effective features and reduce the interference of non-target information. At the same time, the feature pyramid network is used to enhance the feature extraction of each layer. Then, deformable convolution is used to replace part of the common convolution to enhance the learning ability of different shape defects. Finally, the activation function ACON (ActivateOrNot) is used instead of the ReLU activation function, and the activation operation of some neurons is adaptively selected to improve the detection accuracy of the network. The experimental results are mainly acquired via the mean Average Precision (mAP). The experimental results show that the mAP of the C-CenterNet model applied in the cigarette appearance defect detection task is 95.01%. Compared with the original CenterNet model, the model's success rate is increased by 6.14%, so it can meet the requirements of precision and adaptability in cigarette detection tasks on the automatic cigarette production line. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06119v1-abstract-full').style.display = 'none'; document.getElementById('2502.06119v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05873">arXiv:2502.05873</a> <span> [<a href="https://arxiv.org/pdf/2502.05873">pdf</a>, <a href="https://arxiv.org/ps/2502.05873">ps</a>, <a href="https://arxiv.org/format/2502.05873">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> </div> </div> <p class="title is-5 mathjax"> Oriented diameter of the complete tripartite graph (II) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+J">Jing Liu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hui Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05873v1-abstract-short" style="display: inline;"> For a graph $G$, let $\mathbb{D}(G)$ denote the set of all strong orientations of $G$, and the oriented diameter of $G$ is $f(G)=\min \{diam(D) \mid D \in \mathbb{D}(G)\}$, which is the minimum value of the diameters $diam(D)$ where $D \in \mathbb{D}(G)$. In this paper, we determine the oriented diameter of complete tripartite graphs $K(3,3, q)$ and $K(3,4, q)$, these are special cases that arise… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05873v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05873v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05873v1-abstract-full" style="display: none;"> For a graph $G$, let $\mathbb{D}(G)$ denote the set of all strong orientations of $G$, and the oriented diameter of $G$ is $f(G)=\min \{diam(D) \mid D \in \mathbb{D}(G)\}$, which is the minimum value of the diameters $diam(D)$ where $D \in \mathbb{D}(G)$. In this paper, we determine the oriented diameter of complete tripartite graphs $K(3,3, q)$ and $K(3,4, q)$, these are special cases that arise in determining the oriented diameter of $K(3, p, q)$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05873v1-abstract-full').style.display = 'none'; document.getElementById('2502.05873v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 05C20; 05C12 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05433">arXiv:2502.05433</a> <span> [<a href="https://arxiv.org/pdf/2502.05433">pdf</a>, <a href="https://arxiv.org/format/2502.05433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AdaFlow: Efficient Long Video Editing via Adaptive Attention Slimming And Keyframe Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+S">Shuheng Zhang</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yuqi Liu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongbo Zhou</a>, <a href="/search/?searchtype=author&query=Peng%2C+J">Jun Peng</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yiyi Zhou</a>, <a href="/search/?searchtype=author&query=Sun%2C+X">Xiaoshuai Sun</a>, <a href="/search/?searchtype=author&query=Ji%2C+R">Rongrong Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05433v1-abstract-short" style="display: inline;"> Despite great progress, text-driven long video editing is still notoriously challenging mainly due to excessive memory overhead. Although recent efforts have simplified this task into a two-step process of keyframe translation and interpolation generation, the token-wise keyframe translation still plagues the upper limit of video length. In this paper, we propose a novel and training-free approach… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05433v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05433v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05433v1-abstract-full" style="display: none;"> Despite great progress, text-driven long video editing is still notoriously challenging mainly due to excessive memory overhead. Although recent efforts have simplified this task into a two-step process of keyframe translation and interpolation generation, the token-wise keyframe translation still plagues the upper limit of video length. In this paper, we propose a novel and training-free approach towards efficient and effective long video editing, termed AdaFlow. We first reveal that not all tokens of video frames hold equal importance for keyframe translation, based on which we propose an Adaptive Attention Slimming scheme for AdaFlow to squeeze the $KV$ sequence, thus increasing the number of keyframes for translations by an order of magnitude. In addition, an Adaptive Keyframe Selection scheme is also equipped to select the representative frames for joint editing, further improving generation quality. With these innovative designs, AdaFlow achieves high-quality long video editing of minutes in one inference, i.e., more than 1$k$ frames on one A800 GPU, which is about ten times longer than the compared methods, e.g., TokenFlow. To validate AdaFlow, we also build a new benchmark for long video editing with high-quality annotations, termed LongV-EVAL. Our code is released at: https://github.com/jidantang55/AdaFlow. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05433v1-abstract-full').style.display = 'none'; document.getElementById('2502.05433v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04848">arXiv:2502.04848</a> <span> [<a href="https://arxiv.org/pdf/2502.04848">pdf</a>, <a href="https://arxiv.org/format/2502.04848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Astrophysical Phenomena">astro-ph.HE</span> </div> </div> <p class="title is-5 mathjax"> Broadband $纬$-ray spectrum of supernova remnant Cassiopeia A </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhen Cao</a>, <a href="/search/?searchtype=author&query=Aharonian%2C+F">F. Aharonian</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y+X">Y. X. Bai</a>, <a href="/search/?searchtype=author&query=Bao%2C+Y+W">Y. W. Bao</a>, <a href="/search/?searchtype=author&query=Bastieri%2C+D">D. Bastieri</a>, <a href="/search/?searchtype=author&query=Bi%2C+X+J">X. J. Bi</a>, <a href="/search/?searchtype=author&query=Bi%2C+Y+J">Y. J. Bi</a>, <a href="/search/?searchtype=author&query=Bian%2C+W">W. Bian</a>, <a href="/search/?searchtype=author&query=Bukevich%2C+A+V">A. V. Bukevich</a>, <a href="/search/?searchtype=author&query=Cai%2C+C+M">C. M. Cai</a>, <a href="/search/?searchtype=author&query=Cao%2C+W+Y">W. Y. Cao</a>, <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhe Cao</a>, <a href="/search/?searchtype=author&query=Chang%2C+J">J. Chang</a>, <a href="/search/?searchtype=author&query=Chang%2C+J+F">J. F. Chang</a>, <a href="/search/?searchtype=author&query=Chen%2C+A+M">A. M. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+E+S">E. S. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+H+X">H. X. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Liang Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Long Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+M+J">M. J. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+M+L">M. L. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+Q+H">Q. H. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+S">S. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+S+H">S. H. Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+S+Z">S. Z. Chen</a> , et al. (293 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04848v1-abstract-short" style="display: inline;"> The core-collapse supernova remnant (SNR) Cassiopeia A (Cas A) is one of the brightest galactic radio sources with an angular radius of $\sim$ 2.5 $\arcmin$. Although no extension of this source has been detected in the $纬$-ray band, using more than 1000 days of LHAASO data above $\sim 0.8$ TeV, we find that its spectrum is significantly softer than those obtained with Imaging Air Cherenkov Telesc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04848v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04848v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04848v1-abstract-full" style="display: none;"> The core-collapse supernova remnant (SNR) Cassiopeia A (Cas A) is one of the brightest galactic radio sources with an angular radius of $\sim$ 2.5 $\arcmin$. Although no extension of this source has been detected in the $纬$-ray band, using more than 1000 days of LHAASO data above $\sim 0.8$ TeV, we find that its spectrum is significantly softer than those obtained with Imaging Air Cherenkov Telescopes (IACTs) and its flux near $\sim 1$ TeV is about two times higher. In combination with analyses of more than 16 years of \textit{Fermi}-LAT data covering $0.1 \, \mathrm{GeV} - 1 \, \mathrm{TeV}$, we find that the spectrum above 30 GeV deviates significantly from a single power-law, and is best described by a smoothly broken power-law with a spectral index of $1.90 \pm 0.15_\mathrm{stat}$ ($3.41 \pm 0.19_\mathrm{stat}$) below (above) a break energy of $0.63 \pm 0.21_\mathrm{stat} \, \mathrm{TeV}$. Given differences in the angular resolution of LHAASO-WCDA and IACTs, TeV $纬$-ray emission detected with LHAASO may have a significant contribution from regions surrounding the SNR illuminated by particles accelerated earlier, which, however, are treated as background by IACTs. Detailed modelling can be used to constrain acceleration processes of TeV particles in the early stage of SNR evolution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04848v1-abstract-full').style.display = 'none'; document.getElementById('2502.04848v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04537">arXiv:2502.04537</a> <span> [<a href="https://arxiv.org/pdf/2502.04537">pdf</a>, <a href="https://arxiv.org/format/2502.04537">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Multilingual Non-Autoregressive Machine Translation without Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+C">Chenyang Huang</a>, <a href="/search/?searchtype=author&query=Huang%2C+F">Fei Huang</a>, <a href="/search/?searchtype=author&query=Zheng%2C+Z">Zaixiang Zheng</a>, <a href="/search/?searchtype=author&query=Za%C3%AFane%2C+O+R">Osmar R. Za茂ane</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Mou%2C+L">Lili Mou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04537v1-abstract-short" style="display: inline;"> Multilingual neural machine translation (MNMT) aims at using one single model for multiple translation directions. Recent work applies non-autoregressive Transformers to improve the efficiency of MNMT, but requires expensive knowledge distillation (KD) processes. To this end, we propose an M-DAT approach to non-autoregressive multilingual machine translation. Our system leverages the recent advanc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04537v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04537v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04537v1-abstract-full" style="display: none;"> Multilingual neural machine translation (MNMT) aims at using one single model for multiple translation directions. Recent work applies non-autoregressive Transformers to improve the efficiency of MNMT, but requires expensive knowledge distillation (KD) processes. To this end, we propose an M-DAT approach to non-autoregressive multilingual machine translation. Our system leverages the recent advance of the directed acyclic Transformer (DAT), which does not require KD. We further propose a pivot back-translation (PivotBT) approach to improve the generalization to unseen translation directions. Experiments show that our M-DAT achieves state-of-the-art performance in non-autoregressive MNMT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04537v1-abstract-full').style.display = 'none'; document.getElementById('2502.04537v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Findings of the Association for Computational Linguistics: IJCNLP-AACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04535">arXiv:2502.04535</a> <span> [<a href="https://arxiv.org/pdf/2502.04535">pdf</a>, <a href="https://arxiv.org/format/2502.04535">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Decoding Algorithm for Length-Control Summarization Based on Directed Acyclic Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+C">Chenyang Huang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Jen%2C+C">Cameron Jen</a>, <a href="/search/?searchtype=author&query=Zheng%2C+K">Kangjie Zheng</a>, <a href="/search/?searchtype=author&query=Za%C3%AFane%2C+O+R">Osmar R. Za茂ane</a>, <a href="/search/?searchtype=author&query=Mou%2C+L">Lili Mou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04535v1-abstract-short" style="display: inline;"> Length-control summarization aims to condense long texts into a short one within a certain length limit. Previous approaches often use autoregressive (AR) models and treat the length requirement as a soft constraint, which may not always be satisfied. In this study, we propose a novel length-control decoding algorithm based on the Directed Acyclic Transformer (DAT). Our approach allows for multipl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04535v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04535v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04535v1-abstract-full" style="display: none;"> Length-control summarization aims to condense long texts into a short one within a certain length limit. Previous approaches often use autoregressive (AR) models and treat the length requirement as a soft constraint, which may not always be satisfied. In this study, we propose a novel length-control decoding algorithm based on the Directed Acyclic Transformer (DAT). Our approach allows for multiple plausible sequence fragments and predicts a \emph{path} to connect them. In addition, we propose a Sequence Maximum a Posteriori (SeqMAP) decoding algorithm that marginalizes different possible paths and finds the most probable summary satisfying the length budget. Our algorithm is based on beam search, which further facilitates a reranker for performance improvement. Experimental results on the Gigaword and DUC2004 datasets demonstrate our state-of-the-art performance for length-control summarization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04535v1-abstract-full').style.display = 'none'; document.getElementById('2502.04535v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings of the Association for Computational Linguistics: EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04498">arXiv:2502.04498</a> <span> [<a href="https://arxiv.org/pdf/2502.04498">pdf</a>, <a href="https://arxiv.org/format/2502.04498">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Verifiable Format Control for Large Language Model Generations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhaoyang Wang</a>, <a href="/search/?searchtype=author&query=Jiang%2C+J">Jinqi Jiang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Huichi Zhou</a>, <a href="/search/?searchtype=author&query=Zheng%2C+W">Wenhao Zheng</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xuchao Zhang</a>, <a href="/search/?searchtype=author&query=Bansal%2C+C">Chetan Bansal</a>, <a href="/search/?searchtype=author&query=Yao%2C+H">Huaxiu Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04498v1-abstract-short" style="display: inline;"> Recent Large Language Models (LLMs) have demonstrated satisfying general instruction following ability. However, small LLMs with about 7B parameters still struggle fine-grained format following (e.g., JSON format), which seriously hinder the advancements of their applications. Most existing methods focus on benchmarking general instruction following while overlook how to improve the specific forma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04498v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04498v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04498v1-abstract-full" style="display: none;"> Recent Large Language Models (LLMs) have demonstrated satisfying general instruction following ability. However, small LLMs with about 7B parameters still struggle fine-grained format following (e.g., JSON format), which seriously hinder the advancements of their applications. Most existing methods focus on benchmarking general instruction following while overlook how to improve the specific format following ability for small LLMs. Besides, these methods often rely on evaluations based on advanced LLMs (e.g., GPT-4), which can introduce the intrinsic bias of LLMs and be costly due to the API calls. In this paper, we first curate a fully verifiable format following dataset VFF. In contrast to existing works often adopting external LLMs for instruction-following validations, every sample of VFF can be easily validated with a Python function. Further, we propose to leverage this verifiable feature to synthesize massive data for progressively training small LLMs, in order to improve their format following abilities. Experimental results highlight the prevalent limitations in the format following capabilities of 7B level open-source LLMs and demonstrate the effectiveness of our method in enhancing this essential ability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04498v1-abstract-full').style.display = 'none'; document.getElementById('2502.04498v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at Findings of NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03972">arXiv:2502.03972</a> <span> [<a href="https://arxiv.org/pdf/2502.03972">pdf</a>, <a href="https://arxiv.org/format/2502.03972">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Triple-Q state in magnetic breathing kagome lattice </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+H">Hangyu Zhou</a>, <a href="/search/?searchtype=author&query=Dias%2C+M+d+S">Manuel dos Santos Dias</a>, <a href="/search/?searchtype=author&query=Bao%2C+S">Shijian Bao</a>, <a href="/search/?searchtype=author&query=Lu%2C+H">Hanchen Lu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Youguang Zhang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+W">Weisheng Zhao</a>, <a href="/search/?searchtype=author&query=Lounis%2C+S">Samir Lounis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03972v1-abstract-short" style="display: inline;"> Magnetic frustration in two-dimensional spin lattices with triangular motifs underpins a series of exotic states, ranging from multi-Q configurations to disordered spin-glasses. The antiferromagnetic kagome lattice, characterized by its network of corner-sharing triangles, represents a paradigmatic frustrated system exhibiting macroscopic degeneracy. Expanding upon the kagomerization mechanism, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03972v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03972v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03972v1-abstract-full" style="display: none;"> Magnetic frustration in two-dimensional spin lattices with triangular motifs underpins a series of exotic states, ranging from multi-Q configurations to disordered spin-glasses. The antiferromagnetic kagome lattice, characterized by its network of corner-sharing triangles, represents a paradigmatic frustrated system exhibiting macroscopic degeneracy. Expanding upon the kagomerization mechanism, we focus on the magnetic breathing kagome lattice formed by a Mn monolayer deposited on a heavy metal substrate and capped with h-BN. The Mn kagome arrangement induces pronounced magnetic frustration, as evidenced by the nearly flat bands derived from spin spiral energy calculations. Including further-neighbor interactions reveals a spin spiral energy minimum along the $螕$-K line and an intriguing triple-Q state with nonzero topological charge, potentially leading to highly nonlinear Hall effects. Furthermore, the flat band properties can further give rise to an even more complex spin configuration, marked by several Q-pockets in the spin structure factor. These results present a fertile ground for advancing the study of multi-Q states and exploring emergent topological phenomena. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03972v1-abstract-full').style.display = 'none'; document.getElementById('2502.03972v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03828">arXiv:2502.03828</a> <span> [<a href="https://arxiv.org/pdf/2502.03828">pdf</a>, <a href="https://arxiv.org/ps/2502.03828">ps</a>, <a href="https://arxiv.org/format/2502.03828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Observation of $D^+\to \bar K_1(1270)^0渭^+谓_渭$ and $D^0\to K_1(1270)^-渭^+谓_渭$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=BESIII+Collaboration"> BESIII Collaboration</a>, <a href="/search/?searchtype=author&query=Ablikim%2C+M">M. Ablikim</a>, <a href="/search/?searchtype=author&query=Achasov%2C+M+N">M. N. Achasov</a>, <a href="/search/?searchtype=author&query=Adlarson%2C+P">P. Adlarson</a>, <a href="/search/?searchtype=author&query=Afedulidis%2C+O">O. Afedulidis</a>, <a href="/search/?searchtype=author&query=Ai%2C+X+C">X. C. Ai</a>, <a href="/search/?searchtype=author&query=Aliberti%2C+R">R. Aliberti</a>, <a href="/search/?searchtype=author&query=Amoroso%2C+A">A. Amoroso</a>, <a href="/search/?searchtype=author&query=An%2C+Q">Q. An</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Y. Bai</a>, <a href="/search/?searchtype=author&query=Bakina%2C+O">O. Bakina</a>, <a href="/search/?searchtype=author&query=Balossino%2C+I">I. Balossino</a>, <a href="/search/?searchtype=author&query=Ban%2C+Y">Y. Ban</a>, <a href="/search/?searchtype=author&query=Bao%2C+H+-">H. -R. Bao</a>, <a href="/search/?searchtype=author&query=Batozskaya%2C+V">V. Batozskaya</a>, <a href="/search/?searchtype=author&query=Begzsuren%2C+K">K. Begzsuren</a>, <a href="/search/?searchtype=author&query=Berger%2C+N">N. Berger</a>, <a href="/search/?searchtype=author&query=Berlowski%2C+M">M. Berlowski</a>, <a href="/search/?searchtype=author&query=Bertani%2C+M">M. Bertani</a>, <a href="/search/?searchtype=author&query=Bettoni%2C+D">D. Bettoni</a>, <a href="/search/?searchtype=author&query=Bianchi%2C+F">F. Bianchi</a>, <a href="/search/?searchtype=author&query=Bianco%2C+E">E. Bianco</a>, <a href="/search/?searchtype=author&query=Bortone%2C+A">A. Bortone</a>, <a href="/search/?searchtype=author&query=Boyko%2C+I">I. Boyko</a>, <a href="/search/?searchtype=author&query=Briere%2C+R+A">R. A. Briere</a> , et al. (646 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03828v1-abstract-short" style="display: inline;"> By analyzing 7.93 $\rm fb^{-1}$ of $e^+e^-$ collision data collected at the center-of-mass energy of 3.773 GeV with the BESIII detector operated at the BEPCII collider, we report the observation of the semimuonic decays of $D^+\to \bar K_1(1270)^0渭^+谓_渭$ and $D^0\to K_1(1270)^-渭^+谓_渭$ with statistical significances of $12.5蟽$ and $6.0蟽$, respectively. Their decay branching fractions are determined… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03828v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03828v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03828v1-abstract-full" style="display: none;"> By analyzing 7.93 $\rm fb^{-1}$ of $e^+e^-$ collision data collected at the center-of-mass energy of 3.773 GeV with the BESIII detector operated at the BEPCII collider, we report the observation of the semimuonic decays of $D^+\to \bar K_1(1270)^0渭^+谓_渭$ and $D^0\to K_1(1270)^-渭^+谓_渭$ with statistical significances of $12.5蟽$ and $6.0蟽$, respectively. Their decay branching fractions are determined to be ${\mathcal B}[D^{+}\to \bar{K}_1(1270)^0 渭^{+}谓_渭]=(2.36\pm0.20^{+0.18}_{-0.27}\pm 0.48)\times10^{-3}$ and ${\mathcal B}[D^{0}\to K_1(1270)^{-} 渭^{+}谓_渭]=(0.78\pm0.11^{+0.05}_{-0.09}\pm 0.15)\times10^{-3}$, where the first and second uncertainties are statistical and systematic, respectively, and the third originates from the input branching fraction of $\bar K_{1}(1270)^0\to K^- 蟺^+蟺^0$ or $K_1(1270)^-\to K^-蟺^+蟺^-$. Combining our branching fractions with the previous measurements of ${\mathcal B}[D^+\to \bar K_1(1270)^0e^+谓_{e}]$ and ${\mathcal B}[D^0\to K_1(1270)^-e^+谓_{e}]$, we determine the branching fraction ratios to be ${\mathcal B}[D^+\to \bar K_1(1270)^0渭^+谓_渭]/{\mathcal B}[D^+\to \bar K_1(1270)^0e^+谓_{e}]=1.03 \pm 0.14 \substack{+0.11\\-0.15}$ and ${\mathcal B}[D^0\to K_1(1270)^-渭^+谓_渭]/{\mathcal B}[D^0\to K_1(1270)^-e^+谓_{e}]=0.74\pm 0.13 \substack{+0.08\\-0.13}$. Using the branching fractions measured in this work and the world-average lifetimes of the $D^+$ and $D^0$ mesons, we determine the semimuonic partial decay width ratio to be $螕[D^+\to \bar K_1(1270)^0 渭^+谓_渭]/螕[D^0\to K_1(1270)^- 渭^+谓_渭]=1.22\pm 0.10\substack{+0.06\\-0.09}$, which is consistent with unity as predicted by isospin conservation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03828v1-abstract-full').style.display = 'none'; document.getElementById('2502.03828v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03782">arXiv:2502.03782</a> <span> [<a href="https://arxiv.org/pdf/2502.03782">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Solar and Stellar Astrophysics">astro-ph.SR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/universe9010009">10.3390/universe9010009 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Classification of Solar Radio Spectrum Based on Swin Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Tan%2C+C">Chengming Tan</a>, <a href="/search/?searchtype=author&query=Yang%2C+L">Lei Yang</a>, <a href="/search/?searchtype=author&query=Li%2C+S">Siqi Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03782v1-abstract-short" style="display: inline;"> Solar radio observation is a method used to study the Sun. It is very important for space weather early warning and solar physics research to automatically classify solar radio spectrums in real time and judge whether there is a solar radio burst. As the number of solar radio burst spectrums is small and uneven, this paper proposes a classification method for solar radio spectrums based on the Swi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03782v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03782v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03782v1-abstract-full" style="display: none;"> Solar radio observation is a method used to study the Sun. It is very important for space weather early warning and solar physics research to automatically classify solar radio spectrums in real time and judge whether there is a solar radio burst. As the number of solar radio burst spectrums is small and uneven, this paper proposes a classification method for solar radio spectrums based on the Swin transformer. First, the method transfers the parameters of the pretrained model to the Swin transformer model. Then, the hidden layer weights of the Swin transformer are frozen, and the fully connected layer of the Swin transformer is trained on the target dataset. Finally, pa-rameter tuning is performed. The experimental results show that the method can achieve a true positive rate of 100%, which is more accurate than previous methods. Moreover, the number of our model parameters is only 20 million, which is 80% lower than that of the traditional VGG16 con-volutional neural network with more than 130 million parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03782v1-abstract-full').style.display = 'none'; document.getElementById('2502.03782v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03778">arXiv:2502.03778</a> <span> [<a href="https://arxiv.org/pdf/2502.03778">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Solar and Stellar Astrophysics">astro-ph.SR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/universe8120656">10.3390/universe8120656 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Self-Supervised Learning for Solar Radio Spectrum Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+S">Siqi Li</a>, <a href="/search/?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/?searchtype=author&query=Tan%2C+C">Chengming Tan</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03778v1-abstract-short" style="display: inline;"> Solar radio observation is an important way to study the Sun. Solar radio bursts contain important information about solar activity. Therefore, real-time automatic detection and classification of solar radio bursts are of great value for subsequent solar physics research and space weather warnings. Traditional image classification methods based on deep learning often require consid-erable training… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03778v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03778v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03778v1-abstract-full" style="display: none;"> Solar radio observation is an important way to study the Sun. Solar radio bursts contain important information about solar activity. Therefore, real-time automatic detection and classification of solar radio bursts are of great value for subsequent solar physics research and space weather warnings. Traditional image classification methods based on deep learning often require consid-erable training data. To address insufficient solar radio spectrum images, transfer learning is generally used. However, the large difference between natural images and solar spectrum images has a large impact on the transfer learning effect. In this paper, we propose a self-supervised learning method for solar radio spectrum classification. Our method uses self-supervised training with a self-masking approach in natural language processing. Self-supervised learning is more conducive to learning the essential information about images compared with supervised methods, and it is more suitable for transfer learning. First, the method pre-trains using a large amount of other existing data. Then, the trained model is fine-tuned on the solar radio spectrum dataset. Experiments show that the method achieves a classification accuracy similar to that of convolutional neural networks and Transformer networks with supervised training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03778v1-abstract-full').style.display = 'none'; document.getElementById('2502.03778v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03297">arXiv:2502.03297</a> <span> [<a href="https://arxiv.org/pdf/2502.03297">pdf</a>, <a href="https://arxiv.org/format/2502.03297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> IRIS: An Immersive Robot Interaction System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jiang%2C+X">Xinkai Jiang</a>, <a href="/search/?searchtype=author&query=Yuan%2C+Q">Qihao Yuan</a>, <a href="/search/?searchtype=author&query=Dincer%2C+E+U">Enes Ulas Dincer</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongyi Zhou</a>, <a href="/search/?searchtype=author&query=Li%2C+G">Ge Li</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xueyin Li</a>, <a href="/search/?searchtype=author&query=Haag%2C+J">Julius Haag</a>, <a href="/search/?searchtype=author&query=Schreiber%2C+N">Nicolas Schreiber</a>, <a href="/search/?searchtype=author&query=Li%2C+K">Kailai Li</a>, <a href="/search/?searchtype=author&query=Neumann%2C+G">Gerhard Neumann</a>, <a href="/search/?searchtype=author&query=Lioutikov%2C+R">Rudolf Lioutikov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03297v2-abstract-short" style="display: inline;"> This paper introduces IRIS, an immersive Robot Interaction System leveraging Extended Reality (XR), designed for robot data collection and interaction across multiple simulators, benchmarks, and real-world scenarios. While existing XR-based data collection systems provide efficient and intuitive solutions for large-scale data collection, they are often challenging to reproduce and reuse. This limi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03297v2-abstract-full').style.display = 'inline'; document.getElementById('2502.03297v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03297v2-abstract-full" style="display: none;"> This paper introduces IRIS, an immersive Robot Interaction System leveraging Extended Reality (XR), designed for robot data collection and interaction across multiple simulators, benchmarks, and real-world scenarios. While existing XR-based data collection systems provide efficient and intuitive solutions for large-scale data collection, they are often challenging to reproduce and reuse. This limitation arises because current systems are highly tailored to simulator-specific use cases and environments. IRIS is a novel, easily extendable framework that already supports multiple simulators, benchmarks, and even headsets. Furthermore, IRIS is able to include additional information from real-world sensors, such as point clouds captured through depth cameras. A unified scene specification is generated directly from simulators or real-world sensors and transmitted to XR headsets, creating identical scenes in XR. This specification allows IRIS to support any of the objects, assets, and robots provided by the simulators. In addition, IRIS introduces shared spatial anchors and a robust communication protocol that links simulations between multiple XR headsets. This feature enables multiple XR headsets to share a synchronized scene, facilitating collaborative and multi-user data collection. IRIS can be deployed on any device that supports the Unity Framework, encompassing the vast majority of commercially available headsets. In this work, IRIS was deployed and tested on the Meta Quest 3 and the HoloLens 2. IRIS showcased its versatility across a wide range of real-world and simulated scenarios, using current popular robot simulators such as MuJoCo, IsaacSim, CoppeliaSim, and Genesis. In addition, a user study evaluates IRIS on a data collection task for the LIBERO benchmark. The study shows that IRIS significantly outperforms the baseline in both objective and subjective metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03297v2-abstract-full').style.display = 'none'; document.getElementById('2502.03297v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02950">arXiv:2502.02950</a> <span> [<a href="https://arxiv.org/pdf/2502.02950">pdf</a>, <a href="https://arxiv.org/format/2502.02950">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Fine-grained Preference Optimization Improves Zero-shot Text-to-Speech </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yao%2C+J">Jixun Yao</a>, <a href="/search/?searchtype=author&query=Yang%2C+Y">Yuguang Yang</a>, <a href="/search/?searchtype=author&query=Pan%2C+Y">Yu Pan</a>, <a href="/search/?searchtype=author&query=Feng%2C+Y">Yuan Feng</a>, <a href="/search/?searchtype=author&query=Ning%2C+Z">Ziqian Ning</a>, <a href="/search/?searchtype=author&query=Ye%2C+J">Jianhao Ye</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hongbin Zhou</a>, <a href="/search/?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02950v1-abstract-short" style="display: inline;"> Integrating human feedback to align text-to-speech (TTS) system outputs with human preferences has proven to be an effective approach for enhancing the robustness of language model-based TTS systems. Current approaches primarily focus on using preference data annotated at the utterance level. However, frequent issues that affect the listening experience often only arise in specific segments of aud… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02950v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02950v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02950v1-abstract-full" style="display: none;"> Integrating human feedback to align text-to-speech (TTS) system outputs with human preferences has proven to be an effective approach for enhancing the robustness of language model-based TTS systems. Current approaches primarily focus on using preference data annotated at the utterance level. However, frequent issues that affect the listening experience often only arise in specific segments of audio samples, while other segments are well-generated. In this study, we propose a fine-grained preference optimization approach (FPO) to enhance the robustness of TTS systems. FPO focuses on addressing localized issues in generated samples rather than uniformly optimizing the entire utterance. Specifically, we first analyze the types of issues in generated samples, categorize them into two groups, and propose a selective training loss strategy to optimize preferences based on fine-grained labels for each issue type. Experimental results show that FPO enhances the robustness of zero-shot TTS systems by effectively addressing local issues, significantly reducing the bad case ratio, and improving intelligibility. Furthermore, FPO exhibits superior data efficiency compared with baseline systems, achieving similar performance with fewer training samples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02950v1-abstract-full').style.display = 'none'; document.getElementById('2502.02950v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WIP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02850">arXiv:2502.02850</a> <span> [<a href="https://arxiv.org/pdf/2502.02850">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/app12178707">10.3390/app12178707 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> RS-YOLOX: A High Precision Detector for Object Detection in Satellite Remote Sensing Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yang%2C+L">Lei Yang</a>, <a href="/search/?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Liu%2C+H">Hongyu Liu</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/?searchtype=author&query=Wu%2C+H">Hao Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02850v1-abstract-short" style="display: inline;"> Automatic object detection by satellite remote sensing images is of great significance for resource exploration and natural disaster assessment. To solve existing problems in remote sensing image detection, this article proposes an improved YOLOX model for satellite remote sensing image automatic detection. This model is named RS-YOLOX. To strengthen the feature learning ability of the network, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02850v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02850v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02850v1-abstract-full" style="display: none;"> Automatic object detection by satellite remote sensing images is of great significance for resource exploration and natural disaster assessment. To solve existing problems in remote sensing image detection, this article proposes an improved YOLOX model for satellite remote sensing image automatic detection. This model is named RS-YOLOX. To strengthen the feature learning ability of the network, we used Efficient Channel Attention (ECA) in the backbone network of YOLOX and combined the Adaptively Spatial Feature Fusion (ASFF) with the neck network of YOLOX. To balance the numbers of positive and negative samples in training, we used the Varifocal Loss function. Finally, to obtain a high-performance remote sensing object detector, we combined the trained model with an open-source framework called Slicing Aided Hyper Inference (SAHI). This work evaluated models on three aerial remote sensing datasets (DOTA-v1.5, TGRS-HRRSD, and RSOD). Our comparative experiments demonstrate that our model has the highest accuracy in detecting objects in remote sensing image datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02850v1-abstract-full').style.display = 'none'; document.getElementById('2502.02850v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02533">arXiv:2502.02533</a> <span> [<a href="https://arxiv.org/pdf/2502.02533">pdf</a>, <a href="https://arxiv.org/format/2502.02533">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Design: Optimizing Agents with Better Prompts and Topologies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+H">Han Zhou</a>, <a href="/search/?searchtype=author&query=Wan%2C+X">Xingchen Wan</a>, <a href="/search/?searchtype=author&query=Sun%2C+R">Ruoxi Sun</a>, <a href="/search/?searchtype=author&query=Palangi%2C+H">Hamid Palangi</a>, <a href="/search/?searchtype=author&query=Iqbal%2C+S">Shariq Iqbal</a>, <a href="/search/?searchtype=author&query=Vuli%C4%87%2C+I">Ivan Vuli膰</a>, <a href="/search/?searchtype=author&query=Korhonen%2C+A">Anna Korhonen</a>, <a href="/search/?searchtype=author&query=Ar%C4%B1k%2C+S+%C3%96">Sercan 脰. Ar谋k</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02533v1-abstract-short" style="display: inline;"> Large language models, employed as multiple agents that interact and collaborate with each other, have excelled at solving complex tasks. The agents are programmed with prompts that declare their functionality, along with the topologies that orchestrate interactions across agents. Designing prompts and topologies for multi-agent systems (MAS) is inherently complex. To automate the entire design pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02533v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02533v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02533v1-abstract-full" style="display: none;"> Large language models, employed as multiple agents that interact and collaborate with each other, have excelled at solving complex tasks. The agents are programmed with prompts that declare their functionality, along with the topologies that orchestrate interactions across agents. Designing prompts and topologies for multi-agent systems (MAS) is inherently complex. To automate the entire design process, we first conduct an in-depth analysis of the design space aiming to understand the factors behind building effective MAS. We reveal that prompts together with topologies play critical roles in enabling more effective MAS design. Based on the insights, we propose Multi-Agent System Search (MASS), a MAS optimization framework that efficiently exploits the complex MAS design space by interleaving its optimization stages, from local to global, from prompts to topologies, over three stages: 1) block-level (local) prompt optimization; 2) workflow topology optimization; 3) workflow-level (global) prompt optimization, where each stage is conditioned on the iteratively optimized prompts/topologies from former stages. We show that MASS-optimized multi-agent systems outperform a spectrum of existing alternatives by a substantial margin. Based on the MASS-found systems, we finally propose design principles behind building effective multi-agent systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02533v1-abstract-full').style.display = 'none'; document.getElementById('2502.02533v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 7 figures, 1 table (30 pages, 9 figures, 5 tables including references and appendices)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02414">arXiv:2502.02414</a> <span> [<a href="https://arxiv.org/pdf/2502.02414">pdf</a>, <a href="https://arxiv.org/format/2502.02414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Transolver++: An Accurate Neural Solver for PDEs on Million-Scale Geometries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Luo%2C+H">Huakun Luo</a>, <a href="/search/?searchtype=author&query=Wu%2C+H">Haixu Wu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hang Zhou</a>, <a href="/search/?searchtype=author&query=Xing%2C+L">Lanxiang Xing</a>, <a href="/search/?searchtype=author&query=Di%2C+Y">Yichen Di</a>, <a href="/search/?searchtype=author&query=Wang%2C+J">Jianmin Wang</a>, <a href="/search/?searchtype=author&query=Long%2C+M">Mingsheng Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02414v2-abstract-short" style="display: inline;"> Although deep models have been widely explored in solving partial differential equations (PDEs), previous works are primarily limited to data only with up to tens of thousands of mesh points, far from the million-point scale required by industrial simulations that involve complex geometries. In the spirit of advancing neural PDE solvers to real industrial applications, we present Transolver++, a h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02414v2-abstract-full').style.display = 'inline'; document.getElementById('2502.02414v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02414v2-abstract-full" style="display: none;"> Although deep models have been widely explored in solving partial differential equations (PDEs), previous works are primarily limited to data only with up to tens of thousands of mesh points, far from the million-point scale required by industrial simulations that involve complex geometries. In the spirit of advancing neural PDE solvers to real industrial applications, we present Transolver++, a highly parallel and efficient neural solver that can accurately solve PDEs on million-scale geometries. Building upon previous advancements in solving PDEs by learning physical states via Transolver, Transolver++ is further equipped with an extremely optimized parallelism framework and a local adaptive mechanism to efficiently capture eidetic physical states from massive mesh points, successfully tackling the thorny challenges in computation and physics learning when scaling up input mesh size. Transolver++ increases the single-GPU input capacity to million-scale points for the first time and is capable of continuously scaling input size in linear complexity by increasing GPUs. Experimentally, Transolver++ yields 13% relative promotion across six standard PDE benchmarks and achieves over 20% performance gain in million-scale high-fidelity industrial simulations, whose sizes are 100$\times$ larger than previous benchmarks, covering car and 3D aircraft designs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02414v2-abstract-full').style.display = 'none'; document.getElementById('2502.02414v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02063">arXiv:2502.02063</a> <span> [<a href="https://arxiv.org/pdf/2502.02063">pdf</a>, <a href="https://arxiv.org/format/2502.02063">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> CASIM: Composite Aware Semantic Injection for Text to Motion Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chang%2C+C">Che-Jui Chang</a>, <a href="/search/?searchtype=author&query=Liu%2C+Q+T">Qingze Tony Liu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Honglu Zhou</a>, <a href="/search/?searchtype=author&query=Pavlovic%2C+V">Vladimir Pavlovic</a>, <a href="/search/?searchtype=author&query=Kapadia%2C+M">Mubbasir Kapadia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02063v1-abstract-short" style="display: inline;"> Recent advances in generative modeling and tokenization have driven significant progress in text-to-motion generation, leading to enhanced quality and realism in generated motions. However, effectively leveraging textual information for conditional motion generation remains an open challenge. We observe that current approaches, primarily relying on fixed-length text embeddings (e.g., CLIP) for glo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02063v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02063v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02063v1-abstract-full" style="display: none;"> Recent advances in generative modeling and tokenization have driven significant progress in text-to-motion generation, leading to enhanced quality and realism in generated motions. However, effectively leveraging textual information for conditional motion generation remains an open challenge. We observe that current approaches, primarily relying on fixed-length text embeddings (e.g., CLIP) for global semantic injection, struggle to capture the composite nature of human motion, resulting in suboptimal motion quality and controllability. To address this limitation, we propose the Composite Aware Semantic Injection Mechanism (CASIM), comprising a composite-aware semantic encoder and a text-motion aligner that learns the dynamic correspondence between text and motion tokens. Notably, CASIM is model and representation-agnostic, readily integrating with both autoregressive and diffusion-based methods. Experiments on HumanML3D and KIT benchmarks demonstrate that CASIM consistently improves motion quality, text-motion alignment, and retrieval scores across state-of-the-art methods. Qualitative analyses further highlight the superiority of our composite-aware approach over fixed-length semantic injection, enabling precise motion control from text prompts and stronger generalization to unseen text inputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02063v1-abstract-full').style.display = 'none'; document.getElementById('2502.02063v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02016">arXiv:2502.02016</a> <span> [<a href="https://arxiv.org/pdf/2502.02016">pdf</a>, <a href="https://arxiv.org/format/2502.02016">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Periodic Bayesian Flow for Material Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wu%2C+H">Hanlin Wu</a>, <a href="/search/?searchtype=author&query=Song%2C+Y">Yuxuan Song</a>, <a href="/search/?searchtype=author&query=Gong%2C+J">Jingjing Gong</a>, <a href="/search/?searchtype=author&query=Cao%2C+Z">Ziyao Cao</a>, <a href="/search/?searchtype=author&query=Ouyang%2C+Y">Yawen Ouyang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jianbing Zhang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jingjing Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02016v1-abstract-short" style="display: inline;"> Generative modeling of crystal data distribution is an important yet challenging task due to the unique periodic physical symmetry of crystals. Diffusion-based methods have shown early promise in modeling crystal distribution. More recently, Bayesian Flow Networks were introduced to aggregate noisy latent variables, resulting in a variance-reduced parameter space that has been shown to be advantag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02016v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02016v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02016v1-abstract-full" style="display: none;"> Generative modeling of crystal data distribution is an important yet challenging task due to the unique periodic physical symmetry of crystals. Diffusion-based methods have shown early promise in modeling crystal distribution. More recently, Bayesian Flow Networks were introduced to aggregate noisy latent variables, resulting in a variance-reduced parameter space that has been shown to be advantageous for modeling Euclidean data distributions with structural constraints (Song et al., 2023). Inspired by this, we seek to unlock its potential for modeling variables located in non-Euclidean manifolds e.g. those within crystal structures, by overcoming challenging theoretical issues. We introduce CrysBFN, a novel crystal generation method by proposing a periodic Bayesian flow, which essentially differs from the original Gaussian-based BFN by exhibiting non-monotonic entropy dynamics. To successfully realize the concept of periodic Bayesian flow, CrysBFN integrates a new entropy conditioning mechanism and empirically demonstrates its significance compared to time-conditioning. Extensive experiments over both crystal ab initio generation and crystal structure prediction tasks demonstrate the superiority of CrysBFN, which consistently achieves new state-of-the-art on all benchmarks. Surprisingly, we found that CrysBFN enjoys a significant improvement in sampling efficiency, e.g., ~100x speedup 10 v.s. 2000 steps network forwards) compared with previous diffusion-based methods on MP-20 dataset. Code is available at https://github.com/wu-han-lin/CrysBFN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02016v1-abstract-full').style.display = 'none'; document.getElementById('2502.02016v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICLR25</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>