Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,153 results for author: <span class="mathjax">Cai, Z</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Cai, Z"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Cai%2C+Z&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Cai, Z"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.01535">arXiv:2503.01535</a> <span> [<a href="https://arxiv.org/pdf/2503.01535">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Strongly Correlated Electrons">cond-mat.str-el</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1103/PhysRevLett.134.086202">10.1103/PhysRevLett.134.086202 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Flat bands and temperature-driven phase transition in quasi-one-dimensional zigzag chains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gao%2C+J">Jisong Gao</a>, <a href="/search/?searchtype=author&query=Cao%2C+H">Haijun Cao</a>, <a href="/search/?searchtype=author&query=Hu%2C+X">Xuegao Hu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhihao Cai</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Q">Qiaoxiao Zhao</a>, <a href="/search/?searchtype=author&query=Li%2C+D">Dong Li</a>, <a href="/search/?searchtype=author&query=Gao%2C+Z">Zhicheng Gao</a>, <a href="/search/?searchtype=author&query=Ideta%2C+S">Shin-ichiro Ideta</a>, <a href="/search/?searchtype=author&query=Shimada%2C+K">Kenya Shimada</a>, <a href="/search/?searchtype=author&query=Cheng%2C+P">Peng Cheng</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Lan Chen</a>, <a href="/search/?searchtype=author&query=Wu%2C+K">Kehui Wu</a>, <a href="/search/?searchtype=author&query=Meng%2C+S">Sheng Meng</a>, <a href="/search/?searchtype=author&query=Feng%2C+B">Baojie Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.01535v1-abstract-short" style="display: inline;"> Flat-band materials have garnered extensive attention due to their captivating properties associated with strong correlation effects. While flat bands have been discovered in several types of 2D materials, their existence in 1D systems remains elusive. Here, we propose a 1D frustrated lattice, specifically the 1D zigzag lattice, as a platform for hosting flat bands. This lattice can be experimenta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.01535v1-abstract-full').style.display = 'inline'; document.getElementById('2503.01535v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.01535v1-abstract-full" style="display: none;"> Flat-band materials have garnered extensive attention due to their captivating properties associated with strong correlation effects. While flat bands have been discovered in several types of 2D materials, their existence in 1D systems remains elusive. Here, we propose a 1D frustrated lattice, specifically the 1D zigzag lattice, as a platform for hosting flat bands. This lattice can be experimentally realized by growing CuTe chains on Cu(111). The presence of flat bands was confirmed by tight-binding model analysis, first-principles calculations, and angle-resolved photoemission spectroscopy measurements. In addition, we discovered a temperature-driven phase transition at approximately 250 K. Detailed analyses demonstrate that the system has a Tomonaga-Luttinger liquid behavior, accompanied by spin-charge separation effects. Our work unveils new prospects for investigating strongly correlated electron behaviors and topological properties in the 1D limit. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.01535v1-abstract-full').style.display = 'none'; document.getElementById('2503.01535v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Physical Review Letters 134, 086202 (2025) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.00968">arXiv:2503.00968</a> <span> [<a href="https://arxiv.org/pdf/2503.00968">pdf</a>, <a href="https://arxiv.org/format/2503.00968">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Simulation of the Background from $^{13}$C$(伪, n)^{16}$O Reaction in the JUNO Scintillator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=JUNO+Collaboration"> JUNO Collaboration</a>, <a href="/search/?searchtype=author&query=Adam%2C+T">Thomas Adam</a>, <a href="/search/?searchtype=author&query=Adamowicz%2C+K">Kai Adamowicz</a>, <a href="/search/?searchtype=author&query=Ahmad%2C+S">Shakeel Ahmad</a>, <a href="/search/?searchtype=author&query=Ahmed%2C+R">Rizwan Ahmed</a>, <a href="/search/?searchtype=author&query=Aiello%2C+S">Sebastiano Aiello</a>, <a href="/search/?searchtype=author&query=An%2C+F">Fengpeng An</a>, <a href="/search/?searchtype=author&query=Andreopoulos%2C+C">Costas Andreopoulos</a>, <a href="/search/?searchtype=author&query=Andronico%2C+G">Giuseppe Andronico</a>, <a href="/search/?searchtype=author&query=Anfimov%2C+N">Nikolay Anfimov</a>, <a href="/search/?searchtype=author&query=Antonelli%2C+V">Vito Antonelli</a>, <a href="/search/?searchtype=author&query=Antoshkina%2C+T">Tatiana Antoshkina</a>, <a href="/search/?searchtype=author&query=de+Andr%C3%A9%2C+J+P+A+M">Jo茫o Pedro Athayde Marcondes de Andr茅</a>, <a href="/search/?searchtype=author&query=Auguste%2C+D">Didier Auguste</a>, <a href="/search/?searchtype=author&query=Bai%2C+W">Weidong Bai</a>, <a href="/search/?searchtype=author&query=Balashov%2C+N">Nikita Balashov</a>, <a href="/search/?searchtype=author&query=Barresi%2C+A">Andrea Barresi</a>, <a href="/search/?searchtype=author&query=Basilico%2C+D">Davide Basilico</a>, <a href="/search/?searchtype=author&query=Baussan%2C+E">Eric Baussan</a>, <a href="/search/?searchtype=author&query=Beretta%2C+M">Marco Beretta</a>, <a href="/search/?searchtype=author&query=Bergnoli%2C+A">Antonio Bergnoli</a>, <a href="/search/?searchtype=author&query=Bessonov%2C+N">Nikita Bessonov</a>, <a href="/search/?searchtype=author&query=Bick%2C+D">Daniel Bick</a>, <a href="/search/?searchtype=author&query=Bieger%2C+L">Lukas Bieger</a>, <a href="/search/?searchtype=author&query=Biktemerova%2C+S">Svetlana Biktemerova</a> , et al. (608 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.00968v1-abstract-short" style="display: inline;"> Large-scale organic liquid scintillator detectors are highly efficient in the detection of MeV-scale electron antineutrinos. These signal events can be detected through inverse beta decay on protons, which produce a positron accompanied by a neutron. A noteworthy background for antineutrinos coming from nuclear power reactors and from the depths of the Earth (geoneutrinos) is generated by ($伪, n$)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00968v1-abstract-full').style.display = 'inline'; document.getElementById('2503.00968v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.00968v1-abstract-full" style="display: none;"> Large-scale organic liquid scintillator detectors are highly efficient in the detection of MeV-scale electron antineutrinos. These signal events can be detected through inverse beta decay on protons, which produce a positron accompanied by a neutron. A noteworthy background for antineutrinos coming from nuclear power reactors and from the depths of the Earth (geoneutrinos) is generated by ($伪, n$) reactions. In organic liquid scintillator detectors, $伪$ particles emitted from intrinsic contaminants such as $^{238}$U, $^{232}$Th, and $^{210}$Pb/$^{210}$Po, can be captured on $^{13}$C nuclei, followed by the emission of a MeV-scale neutron. Three distinct interaction mechanisms can produce prompt energy depositions preceding the delayed neutron capture, leading to a pair of events correlated in space and time within the detector. Thus, ($伪, n$) reactions represent an indistinguishable background in liquid scintillator-based antineutrino detectors, where their expected rate and energy spectrum are typically evaluated via Monte Carlo simulations. This work presents results from the open-source SaG4n software, used to calculate the expected energy depositions from the neutron and any associated de-excitation products. Also simulated is a detailed detector response to these interactions, using a dedicated Geant4-based simulation software from the JUNO experiment. An expected measurable $^{13}$C$(伪, n)^{16}$O event rate and reconstructed prompt energy spectrum with associated uncertainties, are presented in the context of JUNO, however, the methods and results are applicable and relevant to other organic liquid scintillator neutrino detectors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00968v1-abstract-full').style.display = 'none'; document.getElementById('2503.00968v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 14 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.20846">arXiv:2502.20846</a> <span> [<a href="https://arxiv.org/pdf/2502.20846">pdf</a>, <a href="https://arxiv.org/format/2502.20846">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> AARC: Automated Affinity-aware Resource Configuration for Serverless Workflows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jin%2C+L">Lingxiao Jin</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zinuo Cai</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zebin Chen</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Hongyu Zhao</a>, <a href="/search/?searchtype=author&query=Ma%2C+R">Ruhui Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.20846v1-abstract-short" style="display: inline;"> Serverless computing is increasingly adopted for its ability to manage complex, event-driven workloads without the need for infrastructure provisioning. However, traditional resource allocation in serverless platforms couples CPU and memory, which may not be optimal for all functions. Existing decoupling approaches, while offering some flexibility, are not designed to handle the vast configuration… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.20846v1-abstract-full').style.display = 'inline'; document.getElementById('2502.20846v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.20846v1-abstract-full" style="display: none;"> Serverless computing is increasingly adopted for its ability to manage complex, event-driven workloads without the need for infrastructure provisioning. However, traditional resource allocation in serverless platforms couples CPU and memory, which may not be optimal for all functions. Existing decoupling approaches, while offering some flexibility, are not designed to handle the vast configuration space and complexity of serverless workflows. In this paper, we propose AARC, an innovative, automated framework that decouples CPU and memory resources to provide more flexible and efficient provisioning for serverless workloads. AARC is composed of two key components: Graph-Centric Scheduler, which identifies critical paths in workflows, and Priority Configurator, which applies priority scheduling techniques to optimize resource allocation. Our experimental evaluation demonstrates that AARC achieves substantial improvements over state-of-the-art methods, with total search time reductions of 85.8% and 89.6%, and cost savings of 49.6% and 61.7%, respectively, while maintaining SLO compliance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.20846v1-abstract-full').style.display = 'none'; document.getElementById('2502.20846v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the 62nd Design Automation Conference (DAC 2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.20422">arXiv:2502.20422</a> <span> [<a href="https://arxiv.org/pdf/2502.20422">pdf</a>, <a href="https://arxiv.org/format/2502.20422">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SEKI: Self-Evolution and Knowledge Inspiration based Neural Architecture Search via Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+Z">Zicheng Cai</a>, <a href="/search/?searchtype=author&query=Tang%2C+Y">Yaohua Tang</a>, <a href="/search/?searchtype=author&query=Lai%2C+Y">Yutao Lai</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Hua Wang</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhi Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+H">Hao Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.20422v1-abstract-short" style="display: inline;"> We introduce SEKI, a novel large language model (LLM)-based neural architecture search (NAS) method. Inspired by the chain-of-thought (CoT) paradigm in modern LLMs, SEKI operates in two key stages: self-evolution and knowledge distillation. In the self-evolution stage, LLMs initially lack sufficient reference examples, so we implement an iterative refinement mechanism that enhances architectures b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.20422v1-abstract-full').style.display = 'inline'; document.getElementById('2502.20422v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.20422v1-abstract-full" style="display: none;"> We introduce SEKI, a novel large language model (LLM)-based neural architecture search (NAS) method. Inspired by the chain-of-thought (CoT) paradigm in modern LLMs, SEKI operates in two key stages: self-evolution and knowledge distillation. In the self-evolution stage, LLMs initially lack sufficient reference examples, so we implement an iterative refinement mechanism that enhances architectures based on performance feedback. Over time, this process accumulates a repository of high-performance architectures. In the knowledge distillation stage, LLMs analyze common patterns among these architectures to generate new, optimized designs. Combining these two stages, SEKI greatly leverages the capacity of LLMs on NAS and without requiring any domain-specific data. Experimental results show that SEKI achieves state-of-the-art (SOTA) performance across various datasets and search spaces while requiring only 0.05 GPU-days, outperforming existing methods in both efficiency and accuracy. Furthermore, SEKI demonstrates strong generalization capabilities, achieving SOTA-competitive results across multiple tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.20422v1-abstract-full').style.display = 'none'; document.getElementById('2502.20422v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18911">arXiv:2502.18911</a> <span> [<a href="https://arxiv.org/pdf/2502.18911">pdf</a>, <a href="https://arxiv.org/format/2502.18911">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> </div> </div> <p class="title is-5 mathjax"> Experimental Observation of Topological Disclination States in Lossy Electric Circuits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+J">Jin Liu</a>, <a href="/search/?searchtype=author&query=Jin%2C+W">Wei-Wu Jin</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhao-Fan Cai</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yu-Ran Zhang</a>, <a href="/search/?searchtype=author&query=Wei%2C+X">Xiaomin Wei</a>, <a href="/search/?searchtype=author&query=Ju%2C+W">Wenbo Ju</a>, <a href="/search/?searchtype=author&query=Yang%2C+Z">Zhongmin Yang</a>, <a href="/search/?searchtype=author&query=Liu%2C+T">Tao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18911v1-abstract-short" style="display: inline;"> Topological phase transitions can be remarkably induced purely by manipulating gain and loss mechanisms, offering a novel approach to engineering topological properties. Recent theoretical studies have revealed gain-loss-induced topological disclination states, along with the associated fractional charge trapped at the disclination sites. Here, we present the experimental demonstration of topologi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18911v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18911v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18911v1-abstract-full" style="display: none;"> Topological phase transitions can be remarkably induced purely by manipulating gain and loss mechanisms, offering a novel approach to engineering topological properties. Recent theoretical studies have revealed gain-loss-induced topological disclination states, along with the associated fractional charge trapped at the disclination sites. Here, we present the experimental demonstration of topological disclination states in a purely lossy electric circuit. By designing alternating lossy electric circuit networks that correspond to the disclination lattice, we observe a voltage response localized at the disclination sites and demonstrate the robustness of these states against disorder. Furthermore, we measure the charge distribution, confirming the presence of fractional charge at the disclination sites, which gives rise to the topological disclination states. Our experiment provides direct evidence of gain-loss-induced topological disclination states in electric circuits, opening new possibilities for applications in classical systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18911v1-abstract-full').style.display = 'none'; document.getElementById('2502.18911v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18072">arXiv:2502.18072</a> <span> [<a href="https://arxiv.org/pdf/2502.18072">pdf</a>, <a href="https://arxiv.org/format/2502.18072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> MRBTP: Efficient Multi-Robot Behavior Tree Planning and Collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+Y">Yishuai Cai</a>, <a href="/search/?searchtype=author&query=Chen%2C+X">Xinglin Chen</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhongxuan Cai</a>, <a href="/search/?searchtype=author&query=Mao%2C+Y">Yunxin Mao</a>, <a href="/search/?searchtype=author&query=Li%2C+M">Minglong Li</a>, <a href="/search/?searchtype=author&query=Yang%2C+W">Wenjing Yang</a>, <a href="/search/?searchtype=author&query=Wang%2C+J">Ji Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18072v1-abstract-short" style="display: inline;"> Multi-robot task planning and collaboration are critical challenges in robotics. While Behavior Trees (BTs) have been established as a popular control architecture and are plannable for a single robot, the development of effective multi-robot BT planning algorithms remains challenging due to the complexity of coordinating diverse action spaces. We propose the Multi-Robot Behavior Tree Planning (MR… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18072v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18072v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18072v1-abstract-full" style="display: none;"> Multi-robot task planning and collaboration are critical challenges in robotics. While Behavior Trees (BTs) have been established as a popular control architecture and are plannable for a single robot, the development of effective multi-robot BT planning algorithms remains challenging due to the complexity of coordinating diverse action spaces. We propose the Multi-Robot Behavior Tree Planning (MRBTP) algorithm, with theoretical guarantees of both soundness and completeness. MRBTP features cross-tree expansion to coordinate heterogeneous actions across different BTs to achieve the team's goal. For homogeneous actions, we retain backup structures among BTs to ensure robustness and prevent redundant execution through intention sharing. While MRBTP is capable of generating BTs for both homogeneous and heterogeneous robot teams, its efficiency can be further improved. We then propose an optional plugin for MRBTP when Large Language Models (LLMs) are available to reason goal-related actions for each robot. These relevant actions can be pre-planned to form long-horizon subtrees, significantly enhancing the planning speed and collaboration efficiency of MRBTP. We evaluate our algorithm in warehouse management and everyday service scenarios. Results demonstrate MRBTP's robustness and execution efficiency under varying settings, as well as the ability of the pre-trained LLM to generate effective task-specific subtrees for MRBTP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18072v1-abstract-full').style.display = 'none'; document.getElementById('2502.18072v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17334">arXiv:2502.17334</a> <span> [<a href="https://arxiv.org/pdf/2502.17334">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Optical Propulsion and Levitation of Metajets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Kudtarkar%2C+K">Kaushik Kudtarkar</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yixin Chen</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Ziqiang Cai</a>, <a href="/search/?searchtype=author&query=Cunha%2C+P">Preston Cunha</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/?searchtype=author&query=Lin%2C+S">Sam Lin</a>, <a href="/search/?searchtype=author&query=Wong%2C+Z+J">Zi Jing Wong</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yongmin Liu</a>, <a href="/search/?searchtype=author&query=Lan%2C+S">Shoufeng Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17334v1-abstract-short" style="display: inline;"> The quintessential hallmark distinguishing metasurfaces from traditional optical components is the engineering of subwavelength meta-atoms to manipulate light at will. Enabling this freedom, in a reverse manner, to control objects constituted by metasurfaces could expand our capability of optical manipulation to go beyond the predominant microscopic and sub-microscopic scales. Here, we introduce a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17334v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17334v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17334v1-abstract-full" style="display: none;"> The quintessential hallmark distinguishing metasurfaces from traditional optical components is the engineering of subwavelength meta-atoms to manipulate light at will. Enabling this freedom, in a reverse manner, to control objects constituted by metasurfaces could expand our capability of optical manipulation to go beyond the predominant microscopic and sub-microscopic scales. Here, we introduce a driving metaphotonic force fully controllable by meta-atoms to manipulate structured objects named metajets. Upon Newton's law of motion that can apply to classical and relativistic mechanics, we develop a first-principles theory to analyze optical forces generated by refraction and reflection at an interface. We find that three-dimensional motions of metajets would be possible if one could introduce an extra wavevector component. We achieve that by creating a spatially distributed phase gradient with deliberately arranged silicon nanopillars. Our experiments and simulations reveal an in-plane propulsion and, very importantly, out-of-plane levitation of the metajets, aligning well with the theory. We also find that the metaphotonic force augments with increased light power but is not limited by the size of metajets, which could unleash new opportunities for metaphotonic control in large settings, such as interstellar light sails. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17334v1-abstract-full').style.display = 'none'; document.getElementById('2502.17334v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16595">arXiv:2502.16595</a> <span> [<a href="https://arxiv.org/pdf/2502.16595">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Ultrasound-Coupled Microdroplet Laser Chip for High-Throughput Hyperlipidemia Screening </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+Z">Zhonghao Li</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhihan Cai</a>, <a href="/search/?searchtype=author&query=Gong%2C+C">Chaoyang Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16595v1-abstract-short" style="display: inline;"> The mechanical properties of biological fluids can serve as early indicators of disease, offering valuable insights into complex physiological and pathological processes. However, the existing technologies can hardly support high throughput measurement, which hinders their broad applications in disease diagnosis. Here, we propose the ultrasound-coupled microdroplet laser chips to enable high-throu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16595v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16595v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16595v1-abstract-full" style="display: none;"> The mechanical properties of biological fluids can serve as early indicators of disease, offering valuable insights into complex physiological and pathological processes. However, the existing technologies can hardly support high throughput measurement, which hinders their broad applications in disease diagnosis. Here, we propose the ultrasound-coupled microdroplet laser chips to enable high-throughput measurement of the intrinsic mechanical properties of fluids. The microdroplets supporting high-Q (10^4) whispering gallery modes (WGM) lasing were massively fabricated on a hydrophobic surface with inject printing. The ultrasound was used to actuate the mechanical vibration of the microdroplets. We found that the stimulus-response of the laser emission is strongly dependent on the intrinsic mechanical properties of the liquid, which as subsequently employed to quantify the viscosity. The ultrasound-coupled microdroplet laser chips were used to monitor molecular interactions of bovine serum albumin. High-throughput screening of hyperlipidemia disease was also demonstrated by performing over 2,000 measurements using fast laser scanning. Thanks to the small volume of the microdroplets, a single drop of blood can support over eight billion measurements. The high-throughput ability and small sample consumption of the microlaser chip make it a promising tool for clinical diagnoses based on mechanical properties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16595v1-abstract-full').style.display = 'none'; document.getElementById('2502.16595v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15348">arXiv:2502.15348</a> <span> [<a href="https://arxiv.org/pdf/2502.15348">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Constructing a Norm for Children's Scientific Drawing: Distribution Features Based on Semantic Similarity of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/?searchtype=author&query=Wei%2C+F">Fan Wei</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jingyi Li</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/?searchtype=author&query=Yu%2C+Y">Yanyan Yu</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jianli Chen</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zipo Cai</a>, <a href="/search/?searchtype=author&query=Liu%2C+X">Xinyu Liu</a>, <a href="/search/?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+P">Peng Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15348v1-abstract-short" style="display: inline;"> The use of children's drawings to examining their conceptual understanding has been proven to be an effective method, but there are two major problems with previous research: 1. The content of the drawings heavily relies on the task, and the ecological validity of the conclusions is low; 2. The interpretation of drawings relies too much on the subjective feelings of the researchers. To address thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15348v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15348v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15348v1-abstract-full" style="display: none;"> The use of children's drawings to examining their conceptual understanding has been proven to be an effective method, but there are two major problems with previous research: 1. The content of the drawings heavily relies on the task, and the ecological validity of the conclusions is low; 2. The interpretation of drawings relies too much on the subjective feelings of the researchers. To address this issue, this study uses the Large Language Model (LLM) to identify 1420 children's scientific drawings (covering 9 scientific themes/concepts), and uses the word2vec algorithm to calculate their semantic similarity. The study explores whether there are consistent drawing representations for children on the same theme, and attempts to establish a norm for children's scientific drawings, providing a baseline reference for follow-up children's drawing research. The results show that the representation of most drawings has consistency, manifested as most semantic similarity greater than 0.8. At the same time, it was found that the consistency of the representation is independent of the accuracy (of LLM's recognition), indicating the existence of consistency bias. In the subsequent exploration of influencing factors, we used Kendall rank correlation coefficient to investigate the effects of Sample Size, Abstract Degree, and Focus Points on drawings, and used word frequency statistics to explore whether children represented abstract themes/concepts by reproducing what was taught in class. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15348v1-abstract-full').style.display = 'none'; document.getElementById('2502.15348v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13943">arXiv:2502.13943</a> <span> [<a href="https://arxiv.org/pdf/2502.13943">pdf</a>, <a href="https://arxiv.org/format/2502.13943">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AdaptiveStep: Automatically Dividing Reasoning Step through Model Confidence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+Y">Yuliang Liu</a>, <a href="/search/?searchtype=author&query=Lu%2C+J">Junjie Lu</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhaoling Chen</a>, <a href="/search/?searchtype=author&query=Qu%2C+C">Chaofeng Qu</a>, <a href="/search/?searchtype=author&query=Liu%2C+J+K">Jason Klein Liu</a>, <a href="/search/?searchtype=author&query=Liu%2C+C">Chonghan Liu</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zefan Cai</a>, <a href="/search/?searchtype=author&query=Xia%2C+Y">Yunhui Xia</a>, <a href="/search/?searchtype=author&query=Zhao%2C+L">Li Zhao</a>, <a href="/search/?searchtype=author&query=Bian%2C+J">Jiang Bian</a>, <a href="/search/?searchtype=author&query=Zhang%2C+C">Chuheng Zhang</a>, <a href="/search/?searchtype=author&query=Shen%2C+W">Wei Shen</a>, <a href="/search/?searchtype=author&query=Lin%2C+Z">Zhouhan Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13943v1-abstract-short" style="display: inline;"> Current approaches for training Process Reward Models (PRMs) often involve breaking down responses into multiple reasoning steps using rule-based techniques, such as using predefined placeholder tokens or setting the reasoning step's length into a fixed size. These approaches overlook the fact that specific words do not typically mark true decision points in a text. To address this, we propose Ada… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13943v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13943v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13943v1-abstract-full" style="display: none;"> Current approaches for training Process Reward Models (PRMs) often involve breaking down responses into multiple reasoning steps using rule-based techniques, such as using predefined placeholder tokens or setting the reasoning step's length into a fixed size. These approaches overlook the fact that specific words do not typically mark true decision points in a text. To address this, we propose AdaptiveStep, a method that divides reasoning steps based on the model's confidence in predicting the next word. This division method provides more decision-making information at each step, enhancing downstream tasks, such as reward model learning. Moreover, our method does not require manual annotation. We demonstrate its effectiveness through experiments with AdaptiveStep-trained PRMs in mathematical reasoning and code generation tasks. Experimental results indicate that the outcome PRM achieves state-of-the-art Best-of-N performance, surpassing greedy search strategy with token-level value-guided decoding, while also reducing construction costs by over 30% compared to existing open-source PRMs. In addition, we provide a thorough analysis and case study on the PRM's performance, transferability, and generalization capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13943v1-abstract-full').style.display = 'none'; document.getElementById('2502.13943v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12574">arXiv:2502.12574</a> <span> [<a href="https://arxiv.org/pdf/2502.12574">pdf</a>, <a href="https://arxiv.org/format/2502.12574">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HeadInfer: Memory-Efficient LLM Inference by Head-wise Offloading </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Luo%2C+C">Cheng Luo</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zefan Cai</a>, <a href="/search/?searchtype=author&query=Sun%2C+H">Hanshi Sun</a>, <a href="/search/?searchtype=author&query=Xiao%2C+J">Jinqi Xiao</a>, <a href="/search/?searchtype=author&query=Yuan%2C+B">Bo Yuan</a>, <a href="/search/?searchtype=author&query=Xiao%2C+W">Wen Xiao</a>, <a href="/search/?searchtype=author&query=Hu%2C+J">Junjie Hu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+J">Jiawei Zhao</a>, <a href="/search/?searchtype=author&query=Chen%2C+B">Beidi Chen</a>, <a href="/search/?searchtype=author&query=Anandkumar%2C+A">Anima Anandkumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12574v1-abstract-short" style="display: inline;"> Transformer-based large language models (LLMs) demonstrate impressive performance in long context generation. Extending the context length has disproportionately shifted the memory footprint of LLMs during inference to the key-value cache (KV cache). In this paper, we propose HEADINFER, which offloads the KV cache to CPU RAM while avoiding the need to fully store the KV cache for any transformer l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12574v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12574v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12574v1-abstract-full" style="display: none;"> Transformer-based large language models (LLMs) demonstrate impressive performance in long context generation. Extending the context length has disproportionately shifted the memory footprint of LLMs during inference to the key-value cache (KV cache). In this paper, we propose HEADINFER, which offloads the KV cache to CPU RAM while avoiding the need to fully store the KV cache for any transformer layer on the GPU. HEADINFER employs a fine-grained, head-wise offloading strategy, maintaining only selective attention heads KV cache on the GPU while computing attention output dynamically. Through roofline analysis, we demonstrate that HEADINFER maintains computational efficiency while significantly reducing memory footprint. We evaluate HEADINFER on the Llama-3-8B model with a 1-million-token sequence, reducing the GPU memory footprint of the KV cache from 128 GB to 1 GB and the total GPU memory usage from 207 GB to 17 GB, achieving a 92% reduction compared to BF16 baseline inference. Notably, HEADINFER enables 4-million-token inference with an 8B model on a single consumer GPU with 24GB memory (e.g., NVIDIA RTX 4090) without approximation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12574v1-abstract-full').style.display = 'none'; document.getElementById('2502.12574v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11285">arXiv:2502.11285</a> <span> [<a href="https://arxiv.org/pdf/2502.11285">pdf</a>, <a href="https://arxiv.org/format/2502.11285">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Quantum Error Mitigation for Sampling Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+K">Kecheng Liu</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhenyu Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11285v1-abstract-short" style="display: inline;"> Recent experimental breakthroughs have signalled the imminent arrival of the early fault-tolerant era. However, for a considerable period in the foreseeable future, relying solely on quantum error correction for full error suppression will remain extremely challenging due to its substantial hardware overhead. Additional help from quantum error mitigation (QEM) is essential for bridging this gap to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11285v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11285v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11285v1-abstract-full" style="display: none;"> Recent experimental breakthroughs have signalled the imminent arrival of the early fault-tolerant era. However, for a considerable period in the foreseeable future, relying solely on quantum error correction for full error suppression will remain extremely challenging due to its substantial hardware overhead. Additional help from quantum error mitigation (QEM) is essential for bridging this gap towards achieving quantum advantage. The application of QEM has so far been restricted to expectation value estimation, leaving its extension to sampling-based algorithms -- which is expected to play a pivotal role in the early fault-tolerant era -- an unresolved challenge. In this work, we present a framework for applying any QEM techniques to obtain the error-mitigated output distribution, showing that this incurs no greater cost than estimating a single observable. We also devised a way to sample from this distribution and constructed an explicit scheme for applying any QEM methods to quantum phase estimation, which can be generalised to other sampling algorithms. Numerical experiments were conducted to validate the efficacy of these methods. We believe our methods significantly broaden the scope of QEM, extending its applicability to most algorithms of practical interest and forming a crucial step towards realising quantum advantage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11285v1-abstract-full').style.display = 'none'; document.getElementById('2502.11285v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10203">arXiv:2502.10203</a> <span> [<a href="https://arxiv.org/pdf/2502.10203">pdf</a>, <a href="https://arxiv.org/format/2502.10203">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> AI-in-the-Loop Sensing and Communication Joint Design for Edge Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhijie Cai</a>, <a href="/search/?searchtype=author&query=Cao%2C+X">Xiaowen Cao</a>, <a href="/search/?searchtype=author&query=Chen%2C+X">Xu Chen</a>, <a href="/search/?searchtype=author&query=Cui%2C+Y">Yuanhao Cui</a>, <a href="/search/?searchtype=author&query=Zhu%2C+G">Guangxu Zhu</a>, <a href="/search/?searchtype=author&query=Huang%2C+K">Kaibin Huang</a>, <a href="/search/?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10203v1-abstract-short" style="display: inline;"> Recent breakthroughs in artificial intelligence (AI), wireless communications, and sensing technologies have accelerated the evolution of edge intelligence. However, conventional systems still grapple with issues such as low communication efficiency, redundant data acquisition, and poor model generalization. To overcome these challenges, we propose an innovative framework that enhances edge intell… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10203v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10203v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10203v1-abstract-full" style="display: none;"> Recent breakthroughs in artificial intelligence (AI), wireless communications, and sensing technologies have accelerated the evolution of edge intelligence. However, conventional systems still grapple with issues such as low communication efficiency, redundant data acquisition, and poor model generalization. To overcome these challenges, we propose an innovative framework that enhances edge intelligence through AI-in-the-loop joint sensing and communication (JSAC). This framework features an AI-driven closed-loop control architecture that jointly optimizes system resources, thereby delivering superior system-level performance. A key contribution of our work is establishing an explicit relationship between validation loss and the system's tunable parameters. This insight enables dynamic reduction of the generalization error through AI-driven closed-loop control. Specifically, for sensing control, we introduce an adaptive data collection strategy based on gradient importance sampling, allowing edge devices to autonomously decide when to terminate data acquisition and how to allocate sample weights based on real-time model feedback. For communication control, drawing inspiration from stochastic gradient Langevin dynamics (SGLD), our joint optimization of transmission power and batch size converts channel and data noise into gradient perturbations that help mitigate overfitting. Experimental evaluations demonstrate that our framework reduces communication energy consumption by up to 77 percent and sensing costs measured by the number of collected samples by up to 52 percent while significantly improving model generalization -- with up to 58 percent reductions of the final validation loss. It validates that the proposed scheme can harvest the mutual benefit of AI and JSAC systems by incorporating the model itself into the control loop of the system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10203v1-abstract-full').style.display = 'none'; document.getElementById('2502.10203v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05805">arXiv:2502.05805</a> <span> [<a href="https://arxiv.org/pdf/2502.05805">pdf</a>, <a href="https://arxiv.org/format/2502.05805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Astrophysics of Galaxies">astro-ph.GA</span> </div> </div> <p class="title is-5 mathjax"> The SUPERCOLD-CGM survey: II. [\ion{C}{1}]$(1-0)$ emission and the physical conditions of cold gas in Enormous Ly$伪$ nebulae at $z\,\sim\,2$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+J">Jianan Li</a>, <a href="/search/?searchtype=author&query=Emonts%2C+B+H+C">Bjorn H. C. Emonts</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zheng Cai</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jianrui Li</a>, <a href="/search/?searchtype=author&query=Wang%2C+R">Ran Wang</a>, <a href="/search/?searchtype=author&query=Villar-Mart%C3%ADn%2C+M">Montserrat Villar-Mart铆n</a>, <a href="/search/?searchtype=author&query=Battaia%2C+F+A">Fabrizio Arrigoni Battaia</a>, <a href="/search/?searchtype=author&query=Li%2C+M">Mingyu Li</a>, <a href="/search/?searchtype=author&query=Wu%2C+Y">Yunjing Wu</a>, <a href="/search/?searchtype=author&query=Yoon%2C+I">Ilsang Yoon</a>, <a href="/search/?searchtype=author&query=Lehnert%2C+M+D">Matthew D. Lehnert</a>, <a href="/search/?searchtype=author&query=Massingill%2C+K">Kyle Massingill</a>, <a href="/search/?searchtype=author&query=Sarazin%2C+C">Craig Sarazin</a>, <a href="/search/?searchtype=author&query=Prochaska%2C+J+X">Jason X Prochaska</a>, <a href="/search/?searchtype=author&query=Lacy%2C+M">Mark Lacy</a>, <a href="/search/?searchtype=author&query=Mason%2C+B">Brian Mason</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05805v1-abstract-short" style="display: inline;"> We report ALMA and ACA observations of atomic carbon ([\ion{C}{1}]$(1-0)$) and dust continuum in 10 Enormous Ly$伪$ Nebulae hosting ultra-luminous Type-I QSOs at $z=2.2-2.5$, as part of the SUrvey of Protocluster ELANe Revealing CO/CI in the Ly$伪$ Detected CGM (SUPERCOLD-CGM). We detect [\ion{C}{1}]$(1-0)$ and dust in all ten QSOs and five companion galaxies. We find that the QSOs and companions ha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05805v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05805v1-abstract-full" style="display: none;"> We report ALMA and ACA observations of atomic carbon ([\ion{C}{1}]$(1-0)$) and dust continuum in 10 Enormous Ly$伪$ Nebulae hosting ultra-luminous Type-I QSOs at $z=2.2-2.5$, as part of the SUrvey of Protocluster ELANe Revealing CO/CI in the Ly$伪$ Detected CGM (SUPERCOLD-CGM). We detect [\ion{C}{1}]$(1-0)$ and dust in all ten QSOs and five companion galaxies. We find that the QSOs and companions have higher gas densities and more intense radiation fields than Luminous Infrared galaxies and high-$z$ main sequence galaxies, with the highest values found in the QSOs. By comparing molecular gas masses derived from [\ion{C}{1}]$(1-0)$, CO(4$-$3) and dust continuum, we find that the QSOs and companions display a similar low CO conversion factor of $伪_{\rm CO}$\,$\sim$\,0.8 $\rm M_{\sun}$${[\rm K\,km/s\,pc^2]}^{-1}$. After tapering our data to low resolution, the [\ion{C}{1}]$(1-0)$ flux increases for nine QSOs, hinting at the possibility of [\ion{C}{1}]$(1-0)$ in the circum-galactic medium (CGM) on a scale of 16$-$40 kpc. However, the [\ion{C}{1}]$(1-0)$ sensitivity is too low to confirm this for individual targets, except for a tentative (2.7$蟽$) CGM detection in Q0050+0051{} with M$_{\rm H_2}$\,=\, ($1.0 - 2.8$)$\times 10^{10}$ $\rm M_{\sun}$. The 3$蟽$ mass limits of molecular CGM for the remaining QSO fields are ($0.2-1.4$)\,$\times$\,10$^{10}$ $\rm M_{\sun}$. This translates into a baryon fraction of $<$0.4-3$\% $ in the molecular CGM relative to the total baryonic halo mass. Our sample also includes a radio-detected AGN, Q1416+2649{}, which shows [\ion{C}{1}]$(1-0)$ and CO(4$-$3) luminosities an order of magnitude fainter for its far-infrared luminosity than other QSOs in our sample, possibly due to a lower molecular gas mass. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05805v1-abstract-full').style.display = 'none'; document.getElementById('2502.05805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05674">arXiv:2502.05674</a> <span> [<a href="https://arxiv.org/pdf/2502.05674">pdf</a>, <a href="https://arxiv.org/format/2502.05674">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Less is More for Synthetic Speech Detection in the Wild </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Garg%2C+A">Ashi Garg</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zexin Cai</a>, <a href="/search/?searchtype=author&query=Xinyuan%2C+H+L">Henry Li Xinyuan</a>, <a href="/search/?searchtype=author&query=Garc%C3%ADa-Perera%2C+L+P">Leibny Paola Garc铆a-Perera</a>, <a href="/search/?searchtype=author&query=Duh%2C+K">Kevin Duh</a>, <a href="/search/?searchtype=author&query=Khudanpur%2C+S">Sanjeev Khudanpur</a>, <a href="/search/?searchtype=author&query=Wiesner%2C+M">Matthew Wiesner</a>, <a href="/search/?searchtype=author&query=Andrews%2C+N">Nicholas Andrews</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05674v3-abstract-short" style="display: inline;"> Driven by advances in self-supervised learning for speech, state-of-the-art synthetic speech detectors have achieved low error rates on popular benchmarks such as ASVspoof. However, prior benchmarks do not address the wide range of real-world variability in speech. Are reported error rates realistic in real-world conditions? To assess detector failure modes and robustness under controlled distribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05674v3-abstract-full').style.display = 'inline'; document.getElementById('2502.05674v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05674v3-abstract-full" style="display: none;"> Driven by advances in self-supervised learning for speech, state-of-the-art synthetic speech detectors have achieved low error rates on popular benchmarks such as ASVspoof. However, prior benchmarks do not address the wide range of real-world variability in speech. Are reported error rates realistic in real-world conditions? To assess detector failure modes and robustness under controlled distribution shifts, we introduce ShiftySpeech, a benchmark with more than 3000 hours of synthetic speech from 7 domains, 6 TTS systems, 12 vocoders, and 3 languages. We found that all distribution shifts degraded model performance, and contrary to prior findings, training on more vocoders, speakers, or with data augmentation did not guarantee better generalization. In fact, we found that training on less diverse data resulted in better generalization, and that a detector fit using samples from a single carefully selected vocoder and a small number of speakers, without data augmentations, achieved state-of-the-art results on the challenging In-the-Wild benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05674v3-abstract-full').style.display = 'none'; document.getElementById('2502.05674v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05209">arXiv:2502.05209</a> <span> [<a href="https://arxiv.org/pdf/2502.05209">pdf</a>, <a href="https://arxiv.org/format/2502.05209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Model Tampering Attacks Enable More Rigorous Evaluations of LLM Capabilities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Che%2C+Z">Zora Che</a>, <a href="/search/?searchtype=author&query=Casper%2C+S">Stephen Casper</a>, <a href="/search/?searchtype=author&query=Kirk%2C+R">Robert Kirk</a>, <a href="/search/?searchtype=author&query=Satheesh%2C+A">Anirudh Satheesh</a>, <a href="/search/?searchtype=author&query=Slocum%2C+S">Stewart Slocum</a>, <a href="/search/?searchtype=author&query=McKinney%2C+L+E">Lev E McKinney</a>, <a href="/search/?searchtype=author&query=Gandikota%2C+R">Rohit Gandikota</a>, <a href="/search/?searchtype=author&query=Ewart%2C+A">Aidan Ewart</a>, <a href="/search/?searchtype=author&query=Rosati%2C+D">Domenic Rosati</a>, <a href="/search/?searchtype=author&query=Wu%2C+Z">Zichu Wu</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zikui Cai</a>, <a href="/search/?searchtype=author&query=Chughtai%2C+B">Bilal Chughtai</a>, <a href="/search/?searchtype=author&query=Gal%2C+Y">Yarin Gal</a>, <a href="/search/?searchtype=author&query=Huang%2C+F">Furong Huang</a>, <a href="/search/?searchtype=author&query=Hadfield-Menell%2C+D">Dylan Hadfield-Menell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05209v1-abstract-short" style="display: inline;"> Evaluations of large language model (LLM) risks and capabilities are increasingly being incorporated into AI risk management and governance frameworks. Currently, most risk evaluations are conducted by designing inputs that elicit harmful behaviors from the system. However, a fundamental limitation of this approach is that the harmfulness of the behaviors identified during any particular evaluatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05209v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05209v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05209v1-abstract-full" style="display: none;"> Evaluations of large language model (LLM) risks and capabilities are increasingly being incorporated into AI risk management and governance frameworks. Currently, most risk evaluations are conducted by designing inputs that elicit harmful behaviors from the system. However, a fundamental limitation of this approach is that the harmfulness of the behaviors identified during any particular evaluation can only lower bound the model's worst-possible-case behavior. As a complementary method for eliciting harmful behaviors, we propose evaluating LLMs with model tampering attacks which allow for modifications to latent activations or weights. We pit state-of-the-art techniques for removing harmful LLM capabilities against a suite of 5 input-space and 6 model tampering attacks. In addition to benchmarking these methods against each other, we show that (1) model resilience to capability elicitation attacks lies on a low-dimensional robustness subspace; (2) the attack success rate of model tampering attacks can empirically predict and offer conservative estimates for the success of held-out input-space attacks; and (3) state-of-the-art unlearning methods can easily be undone within 16 steps of fine-tuning. Together these results highlight the difficulty of removing harmful LLM capabilities and show that model tampering attacks enable substantially more rigorous evaluations than input-space attacks alone. We release models at https://huggingface.co/LLM-GAT <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05209v1-abstract-full').style.display = 'none'; document.getElementById('2502.05209v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04627">arXiv:2502.04627</a> <span> [<a href="https://arxiv.org/pdf/2502.04627">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> WGM microprobe device for high-sensitivity and broadband ultrasound detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Sun%2C+J">Jialve Sun</a>, <a href="/search/?searchtype=author&query=Huangfu%2C+S">Shengnan Huangfu</a>, <a href="/search/?searchtype=author&query=Chen%2C+T">Tinglan Chen</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zijing Cai</a>, <a href="/search/?searchtype=author&query=Ruan%2C+B">Bowen Ruan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+F">Fangxing Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04627v2-abstract-short" style="display: inline;"> Whispering-gallery-mode (WGM) microcavities have emerged as a promising alternative to traditional ultrasound probes, offering high sensitivity and wide bandwidth. In our research, we propose a novel silica WGM microprobe device, with impressive Q factors up to 10^7.The side-coupled approach and special encapsulation design make the device small, robust, and capable of utilizing in both gaseous an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04627v2-abstract-full').style.display = 'inline'; document.getElementById('2502.04627v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04627v2-abstract-full" style="display: none;"> Whispering-gallery-mode (WGM) microcavities have emerged as a promising alternative to traditional ultrasound probes, offering high sensitivity and wide bandwidth. In our research, we propose a novel silica WGM microprobe device, with impressive Q factors up to 10^7.The side-coupled approach and special encapsulation design make the device small, robust, and capable of utilizing in both gaseous and liquid environments.We have successfully conducted photoacoustic (PA) imaging on various samples using this device which demonstrates a high sensitivity of 5.4 mPa/sqrt(Hz) and a board bandwidth of 41 MHz at -6 dB for ultrasound. What's more, it's capable of capturing the vibration spectrum of microparticles up to a few hundred megahertz. Our compact and lightweight device exhibits significant application potential in PA endoscopic detection, near-field ultrasound sensing and other aspects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04627v2-abstract-full').style.display = 'none'; document.getElementById('2502.04627v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04519">arXiv:2502.04519</a> <span> [<a href="https://arxiv.org/pdf/2502.04519">pdf</a>, <a href="https://arxiv.org/format/2502.04519">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GenVC: Self-Supervised Zero-Shot Voice Conversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+Z">Zexin Cai</a>, <a href="/search/?searchtype=author&query=Xinyuan%2C+H+L">Henry Li Xinyuan</a>, <a href="/search/?searchtype=author&query=Garg%2C+A">Ashi Garg</a>, <a href="/search/?searchtype=author&query=Garc%C3%ADa-Perera%2C+L+P">Leibny Paola Garc铆a-Perera</a>, <a href="/search/?searchtype=author&query=Duh%2C+K">Kevin Duh</a>, <a href="/search/?searchtype=author&query=Khudanpur%2C+S">Sanjeev Khudanpur</a>, <a href="/search/?searchtype=author&query=Wiesner%2C+M">Matthew Wiesner</a>, <a href="/search/?searchtype=author&query=Andrews%2C+N">Nicholas Andrews</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04519v1-abstract-short" style="display: inline;"> Zero-shot voice conversion has recently made substantial progress, but many models still depend on external supervised systems to disentangle speaker identity and linguistic content. Furthermore, current methods often use parallel conversion, where the converted speech inherits the source utterance's temporal structure, restricting speaker similarity and privacy. To overcome these limitations, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04519v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04519v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04519v1-abstract-full" style="display: none;"> Zero-shot voice conversion has recently made substantial progress, but many models still depend on external supervised systems to disentangle speaker identity and linguistic content. Furthermore, current methods often use parallel conversion, where the converted speech inherits the source utterance's temporal structure, restricting speaker similarity and privacy. To overcome these limitations, we introduce GenVC, a generative zero-shot voice conversion model. GenVC learns to disentangle linguistic content and speaker style in a self-supervised manner, eliminating the need for external models and enabling efficient training on large, unlabeled datasets. Experimental results show that GenVC achieves state-of-the-art speaker similarity while maintaining naturalness competitive with leading approaches. Its autoregressive generation also allows the converted speech to deviate from the source utterance's temporal structure. This feature makes GenVC highly effective for voice anonymization, as it minimizes the preservation of source prosody and speaker characteristics, enhancing privacy protection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04519v1-abstract-full').style.display = 'none'; document.getElementById('2502.04519v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01612">arXiv:2502.01612</a> <span> [<a href="https://arxiv.org/pdf/2502.01612">pdf</a>, <a href="https://arxiv.org/format/2502.01612">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Self-Improving Transformers Overcome Easy-to-Hard and Length Generalization Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lee%2C+N">Nayoung Lee</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Ziyang Cai</a>, <a href="/search/?searchtype=author&query=Schwarzschild%2C+A">Avi Schwarzschild</a>, <a href="/search/?searchtype=author&query=Lee%2C+K">Kangwook Lee</a>, <a href="/search/?searchtype=author&query=Papailiopoulos%2C+D">Dimitris Papailiopoulos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01612v2-abstract-short" style="display: inline;"> Large language models often struggle with length generalization and solving complex problem instances beyond their training distribution. We present a self-improvement approach where models iteratively generate and learn from their own solutions, progressively tackling harder problems while maintaining a standard transformer architecture. Across diverse tasks including arithmetic, string manipulat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01612v2-abstract-full').style.display = 'inline'; document.getElementById('2502.01612v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01612v2-abstract-full" style="display: none;"> Large language models often struggle with length generalization and solving complex problem instances beyond their training distribution. We present a self-improvement approach where models iteratively generate and learn from their own solutions, progressively tackling harder problems while maintaining a standard transformer architecture. Across diverse tasks including arithmetic, string manipulation, and maze solving, self-improving enables models to solve problems far beyond their initial training distribution-for instance, generalizing from 10-digit to 100-digit addition without apparent saturation. We observe that in some cases filtering for correct self-generated examples leads to exponential improvements in out-of-distribution performance across training rounds. Additionally, starting from pretrained models significantly accelerates this self-improvement process for several tasks. Our results demonstrate how controlled weak-to-strong curricula can systematically teach a model logical extrapolation without any changes to the positional embeddings, or the model architecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01612v2-abstract-full').style.display = 'none'; document.getElementById('2502.01612v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Added references</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01299">arXiv:2502.01299</a> <span> [<a href="https://arxiv.org/pdf/2502.01299">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Probabilistic adaptation of language comprehension for individual speakers: Evidence from neural oscillations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wu%2C+H">Hanlin Wu</a>, <a href="/search/?searchtype=author&query=Rao%2C+X">Xiaohui Rao</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z+G">Zhenguang G. Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01299v1-abstract-short" style="display: inline;"> Listeners adapt language comprehension based on their mental representations of speakers, but how these representations are dynamically updated remains unclear. We investigated whether listeners probabilistically adapt their comprehension based on the likelihood of speakers producing stereotype-incongruent utterances. Our findings reveal two potential mechanisms: a speaker-general mechanism that a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01299v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01299v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01299v1-abstract-full" style="display: none;"> Listeners adapt language comprehension based on their mental representations of speakers, but how these representations are dynamically updated remains unclear. We investigated whether listeners probabilistically adapt their comprehension based on the likelihood of speakers producing stereotype-incongruent utterances. Our findings reveal two potential mechanisms: a speaker-general mechanism that adjusts overall expectations about speaker-content relationships, and a speaker-specific mechanism that updates individual speaker models. In two EEG experiments, participants heard speakers make stereotype-congruent or incongruent utterances, with incongruency base rate manipulated between blocks. In Experiment 1, speaker incongruency modulated both high-beta (21-30 Hz) and theta (4-6 Hz) oscillations: incongruent utterances decreased oscillatory power in low base rate condition but increased it in high base rate condition. The theta effect varied with listeners' openness trait: less open participants showed theta increases to speaker-incongruencies, suggesting maintenance of speaker-specific information, while more open participants showed theta decreases, indicating flexible model updating. In Experiment 2, we dissociated base rate from the target speaker by manipulating the overall base rate using an alternative non-target speaker. Only the high-beta effect persisted, showing power decrease for speaker-incongruencies in low base rate condition but no effect in high base rate condition. The high-beta oscillations might reflect the speaker-general adjustment, while theta oscillations may index the speaker-specific model updating. These findings provide evidence for how language processing is shaped by social cognition in real time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01299v1-abstract-full').style.display = 'none'; document.getElementById('2502.01299v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01136">arXiv:2502.01136</a> <span> [<a href="https://arxiv.org/pdf/2502.01136">pdf</a>, <a href="https://arxiv.org/format/2502.01136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Dissipative quantum phase transitions monitored by current fluctuations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Matsumoto%2C+M">Masataka Matsumoto</a>, <a href="/search/?searchtype=author&query=Baggioli%2C+M">Matteo Baggioli</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zi Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01136v1-abstract-short" style="display: inline;"> Dissipative phase transitions (DPT) are defined by sudden changes in the physical properties of nonequilibrium open quantum systems and they present characteristics that have no analogue in closed and thermal systems. Several methods to detect and characterize DPT have been suggested in the literature, the most famous of which -- the $\textit{Liouvillian gap}$ -- can be derived from a spectral ana… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01136v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01136v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01136v1-abstract-full" style="display: none;"> Dissipative phase transitions (DPT) are defined by sudden changes in the physical properties of nonequilibrium open quantum systems and they present characteristics that have no analogue in closed and thermal systems. Several methods to detect and characterize DPT have been suggested in the literature, the most famous of which -- the $\textit{Liouvillian gap}$ -- can be derived from a spectral analysis of the Liouvillian super-operator that governs the complex interplay between coherent and dissipative dynamics. Here, we consider the $\textit{output current}$, defined as the average total quantum jumps per unit time between the open quantum system and the environment. We propose that output current fluctuations, and in particular their dynamical correlations, their power spectrum, and their characteristic timescale can provide valuable information about DPT, confirming a dramatic change of behavior at the critical point. We validate our proposal using the dissipative XYZ model and the nonlinear driven-dissipative Kerr model, showing good agreement with previous estimates of the location of the critical point. Compared to previous approaches, our proposal could be already experimentally tested in optical systems, providing a practical method to detect criticality in quantum open systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01136v1-abstract-full').style.display = 'none'; document.getElementById('2502.01136v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 14 figures, comments welcome</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00372">arXiv:2502.00372</a> <span> [<a href="https://arxiv.org/pdf/2502.00372">pdf</a>, <a href="https://arxiv.org/format/2502.00372">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NAVER: A Neuro-Symbolic Compositional Automaton for Visual Grounding with Explicit Logic Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhixi Cai</a>, <a href="/search/?searchtype=author&query=Ke%2C+F">Fucai Ke</a>, <a href="/search/?searchtype=author&query=Jahangard%2C+S">Simindokht Jahangard</a>, <a href="/search/?searchtype=author&query=de+la+Banda%2C+M+G">Maria Garcia de la Banda</a>, <a href="/search/?searchtype=author&query=Haffari%2C+R">Reza Haffari</a>, <a href="/search/?searchtype=author&query=Stuckey%2C+P+J">Peter J. Stuckey</a>, <a href="/search/?searchtype=author&query=Rezatofighi%2C+H">Hamid Rezatofighi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00372v1-abstract-short" style="display: inline;"> Visual Grounding (VG) tasks, such as referring expression detection and segmentation tasks are important for linking visual entities to context, especially in complex reasoning tasks that require detailed query interpretation. This paper explores VG beyond basic perception, highlighting challenges for methods that require reasoning like human cognition. Recent advances in large language methods (L… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00372v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00372v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00372v1-abstract-full" style="display: none;"> Visual Grounding (VG) tasks, such as referring expression detection and segmentation tasks are important for linking visual entities to context, especially in complex reasoning tasks that require detailed query interpretation. This paper explores VG beyond basic perception, highlighting challenges for methods that require reasoning like human cognition. Recent advances in large language methods (LLMs) and Vision-Language methods (VLMs) have improved abilities for visual comprehension, contextual understanding, and reasoning. These methods are mainly split into end-to-end and compositional methods, with the latter offering more flexibility. Compositional approaches that integrate LLMs and foundation models show promising performance but still struggle with complex reasoning with language-based logical representations. To address these limitations, we propose NAVER, a compositional visual grounding method that integrates explicit probabilistic logic reasoning within a finite-state automaton, equipped with a self-correcting mechanism. This design improves robustness and interpretability in inference through explicit logic reasoning. Our results show that NAVER achieves SoTA performance comparing to recent end-to-end and compositional baselines. The code is available at https://github.com/ControlNet/NAVER . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00372v1-abstract-full').style.display = 'none'; document.getElementById('2502.00372v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00319">arXiv:2502.00319</a> <span> [<a href="https://arxiv.org/pdf/2502.00319">pdf</a>, <a href="https://arxiv.org/format/2502.00319">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Physics-Inspired Distributed Radio Map Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yang%2C+D">Dong Yang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+S">Songyang Zhang</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yingshu Li</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhipeng Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00319v1-abstract-short" style="display: inline;"> To gain panoramic awareness of spectrum coverage in complex wireless environments, data-driven learning approaches have recently been introduced for radio map estimation (RME). While existing deep learning based methods conduct RME given spectrum measurements gathered from dispersed sensors in the region of interest, they rely on centralized data at a fusion center, which however raises critical c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00319v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00319v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00319v1-abstract-full" style="display: none;"> To gain panoramic awareness of spectrum coverage in complex wireless environments, data-driven learning approaches have recently been introduced for radio map estimation (RME). While existing deep learning based methods conduct RME given spectrum measurements gathered from dispersed sensors in the region of interest, they rely on centralized data at a fusion center, which however raises critical concerns on data privacy leakages and high communication overloads. Federated learning (FL) enhance data security and communication efficiency in RME by allowing multiple clients to collaborate in model training without directly sharing local data. However, the performance of the FL-based RME can be hindered by the problem of task heterogeneity across clients due to their unavailable or inaccurate landscaping information. To fill this gap, in this paper, we propose a physics-inspired distributed RME solution in the absence of landscaping information. The main idea is to develop a novel distributed RME framework empowered by leveraging the domain knowledge of radio propagation models, and by designing a new distributed learning approach that splits the entire RME model into two modules. A global autoencoder module is shared among clients to capture the common pathloss influence on radio propagation pattern, while a client-specific autoencoder module focuses on learning the individual features produced by local shadowing effects from the unique building distributions in local environment. Simulation results show that our proposed method outperforms the benchmarks in achieving higher performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00319v1-abstract-full').style.display = 'none'; document.getElementById('2502.00319v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17729">arXiv:2501.17729</a> <span> [<a href="https://arxiv.org/pdf/2501.17729">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> A technical review of multi-omics data integration methods: from classical statistical to deep generative approaches </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Bai%C3%A3o%2C+A+R">Ana R. Bai茫o</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhaoxiang Cai</a>, <a href="/search/?searchtype=author&query=Poulos%2C+R+C">Rebecca C Poulos</a>, <a href="/search/?searchtype=author&query=Robinson%2C+P+J">Phillip J. Robinson</a>, <a href="/search/?searchtype=author&query=Reddel%2C+R+R">Roger R Reddel</a>, <a href="/search/?searchtype=author&query=Zhong%2C+Q">Qing Zhong</a>, <a href="/search/?searchtype=author&query=Vinga%2C+S">Susana Vinga</a>, <a href="/search/?searchtype=author&query=Gon%C3%A7alves%2C+E">Emanuel Gon莽alves</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17729v1-abstract-short" style="display: inline;"> The rapid advancement of high-throughput sequencing and other assay technologies has resulted in the generation of large and complex multi-omics datasets, offering unprecedented opportunities for advancing precision medicine strategies. However, multi-omics data integration presents significant challenges due to the high dimensionality, heterogeneity, experimental gaps, and frequency of missing va… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17729v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17729v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17729v1-abstract-full" style="display: none;"> The rapid advancement of high-throughput sequencing and other assay technologies has resulted in the generation of large and complex multi-omics datasets, offering unprecedented opportunities for advancing precision medicine strategies. However, multi-omics data integration presents significant challenges due to the high dimensionality, heterogeneity, experimental gaps, and frequency of missing values across data types. Computational methods have been developed to address these issues, employing statistical and machine learning approaches to uncover complex biological patterns and provide deeper insights into our understanding of disease mechanisms. Here, we comprehensively review state-of-the-art multi-omics data integration methods with a focus on deep generative models, particularly variational autoencoders (VAEs) that have been widely used for data imputation and augmentation, joint embedding creation, and batch effect correction. We explore the technical aspects of loss functions and regularisation techniques including adversarial training, disentanglement and contrastive learning. Moreover, we discuss recent advancements in foundation models and the integration of emerging data modalities, while describing the current limitations and outlining future directions for enhancing multi-modal methodologies in biomedical research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17729v1-abstract-full').style.display = 'none'; document.getElementById('2501.17729v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">43 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17610">arXiv:2501.17610</a> <span> [<a href="https://arxiv.org/pdf/2501.17610">pdf</a>, <a href="https://arxiv.org/format/2501.17610">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> FeedSign: Robust Full-parameter Federated Fine-tuning of Large Models with Extremely Low Communication Overhead of One Bit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhijie Cai</a>, <a href="/search/?searchtype=author&query=Chen%2C+H">Haolong Chen</a>, <a href="/search/?searchtype=author&query=Zhu%2C+G">Guangxu Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17610v1-abstract-short" style="display: inline;"> Federated fine-tuning (FFT) attempts to fine-tune a pre-trained model with private data from distributed clients by exchanging models rather than data under the orchestration of a parameter server (PS). To overcome the bottleneck forged by the growing communication and memory overhead for clients in such systems due to the growing model sizes, we propose \textit{FeedSign}, an FFT algorithm in whic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17610v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17610v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17610v1-abstract-full" style="display: none;"> Federated fine-tuning (FFT) attempts to fine-tune a pre-trained model with private data from distributed clients by exchanging models rather than data under the orchestration of a parameter server (PS). To overcome the bottleneck forged by the growing communication and memory overhead for clients in such systems due to the growing model sizes, we propose \textit{FeedSign}, an FFT algorithm in which the upload and download payload for an aggregation step is exactly $1$ bit per step, while the memory overhead is squeezed to the amount needed for inference. This is realized by utilizing zeroth-order (ZO) optimizers on large models and shared pseudo-random number generators (PRNG) across devices to represent the gradient estimates as seed-sign pairs. We conduct theoretical analysis on FeedSign and show that it converges at an exponential rate $\mathcal{O}(e^{-t})$, where $t$ is the number of elapsed steps under widely used assumptions. Moreover, FeedSign is found to be robust against data heterogeneity and Byzantine attacks. We conducted extensive experiments on models across different structures and sizes (11M to 13B) and found that the proposed method performs better or closely, depending on scenarios, compared to its ZO and FO counterparts, albeit with an orders-of-magnitude lower communication overhead. We also discuss some interesting advantages as byproducts guaranteed by the minimalistic design of \textit{FeedSign}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17610v1-abstract-full').style.display = 'none'; document.getElementById('2501.17610v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15974">arXiv:2501.15974</a> <span> [<a href="https://arxiv.org/pdf/2501.15974">pdf</a>, <a href="https://arxiv.org/ps/2501.15974">ps</a>, <a href="https://arxiv.org/format/2501.15974">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Nuclear Theory">nucl-th</span> </div> </div> <p class="title is-5 mathjax"> The correlation between the $伪$-cluster separation and the neutron S-factor in $^{12}$Be </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhilian Cai</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Q">Qing Zhao</a>, <a href="/search/?searchtype=author&query=Yang%2C+Z">Zaihong Yang</a>, <a href="/search/?searchtype=author&query=Kimura%2C+M">Masaaki Kimura</a>, <a href="/search/?searchtype=author&query=Zhou%2C+B">Bo Zhou</a>, <a href="/search/?searchtype=author&query=Shin%2C+S">Seung-heon Shin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15974v1-abstract-short" style="display: inline;"> The reduced width amplitudes (RWA) and the spectroscopic factor (S-factor) of $伪$-cluster and valence neutron in $^{12}$Be are calculated by the generator coordinates method (GCM) with the cluster model. By fixing the distance between the $伪$-clusters' generated coordinates, we make a theoretical experiment to analyze the relationship between the $伪$-clustering separation and the orbital occupatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15974v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15974v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15974v1-abstract-full" style="display: none;"> The reduced width amplitudes (RWA) and the spectroscopic factor (S-factor) of $伪$-cluster and valence neutron in $^{12}$Be are calculated by the generator coordinates method (GCM) with the cluster model. By fixing the distance between the $伪$-clusters' generated coordinates, we make a theoretical experiment to analyze the relationship between the $伪$-clustering separation and the orbital occupation of the valence neutron in $^{12}$Be. The analysis of the results shows that the percentage of the $蟽$ orbital occupation in $^{12}$Be is positively related to the clustering separation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15974v1-abstract-full').style.display = 'none'; document.getElementById('2501.15974v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14785">arXiv:2501.14785</a> <span> [<a href="https://arxiv.org/pdf/2501.14785">pdf</a>, <a href="https://arxiv.org/format/2501.14785">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> ED-Filter: Dynamic Feature Filtering for Eating Disorder Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Naseriparsa%2C+M">Mehdi Naseriparsa</a>, <a href="/search/?searchtype=author&query=Sukunesan%2C+S">Suku Sukunesan</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhen Cai</a>, <a href="/search/?searchtype=author&query=Alfarraj%2C+O">Osama Alfarraj</a>, <a href="/search/?searchtype=author&query=Tolba%2C+A">Amr Tolba</a>, <a href="/search/?searchtype=author&query=Rabooki%2C+S+F">Saba Fathi Rabooki</a>, <a href="/search/?searchtype=author&query=Xia%2C+F">Feng Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14785v1-abstract-short" style="display: inline;"> Eating disorders (ED) are critical psychiatric problems that have alarmed the mental health community. Mental health professionals are increasingly recognizing the utility of data derived from social media platforms such as Twitter. However, high dimensionality and extensive feature sets of Twitter data present remarkable challenges for ED classification. To overcome these hurdles, we introduce a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14785v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14785v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14785v1-abstract-full" style="display: none;"> Eating disorders (ED) are critical psychiatric problems that have alarmed the mental health community. Mental health professionals are increasingly recognizing the utility of data derived from social media platforms such as Twitter. However, high dimensionality and extensive feature sets of Twitter data present remarkable challenges for ED classification. To overcome these hurdles, we introduce a novel method, an informed branch and bound search technique known as ED-Filter. This strategy significantly improves the drawbacks of conventional feature selection algorithms such as filters and wrappers. ED-Filter iteratively identifies an optimal set of promising features that maximize the eating disorder classification accuracy. In order to adapt to the dynamic nature of Twitter ED data, we enhance the ED-Filter with a hybrid greedy-based deep learning algorithm. This algorithm swiftly identifies sub-optimal features to accommodate the ever-evolving data landscape. Experimental results on Twitter eating disorder data affirm the effectiveness and efficiency of ED-Filter. The method demonstrates significant improvements in classification accuracy and proves its value in eating disorder detection on social media platforms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14785v1-abstract-full').style.display = 'none'; document.getElementById('2501.14785v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14026">arXiv:2501.14026</a> <span> [<a href="https://arxiv.org/pdf/2501.14026">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Astrophysics of Galaxies">astro-ph.GA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> </div> </div> <p class="title is-5 mathjax"> Luminous Mid-IR Selected Type-2 Quasars at Cosmic Noon in SDSS Stripe82 I: Selection, Composite Photometry, and Spectral Energy Distributions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+B">Ben Wang</a>, <a href="/search/?searchtype=author&query=Hennawi%2C+J+F">Joseph F. Hennawi</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zheng Cai</a>, <a href="/search/?searchtype=author&query=Richards%2C+G+T">Gordon T. Richards</a>, <a href="/search/?searchtype=author&query=Schindler%2C+J">Jan-Torge Schindler</a>, <a href="/search/?searchtype=author&query=Zakamska%2C+N+L">Nadia L. Zakamska</a>, <a href="/search/?searchtype=author&query=Ishikawa%2C+Y">Yuzo Ishikawa</a>, <a href="/search/?searchtype=author&query=Akins%2C+H+B">Hollis B. Akins</a>, <a href="/search/?searchtype=author&query=Sun%2C+Z">Zechang Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14026v1-abstract-short" style="display: inline;"> We analyze 23 spectroscopically confirmed Type-2 quasars (QSOs) selected from the WISE 22$\rm 渭$m band in the SDSS Stripe 82 region, focusing on their multi-band photometry and spectral energy distributions (SEDs). These objects were selected to be IR-luminous ($\rm flux_{W4} > 5mJy$, i.e., $12.62 < W4 < 14.62 \rm\ AB \, magnitude$), optically faint ($r > 23$) or with red color ($r - W4 >8.38$). G… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14026v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14026v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14026v1-abstract-full" style="display: none;"> We analyze 23 spectroscopically confirmed Type-2 quasars (QSOs) selected from the WISE 22$\rm 渭$m band in the SDSS Stripe 82 region, focusing on their multi-band photometry and spectral energy distributions (SEDs). These objects were selected to be IR-luminous ($\rm flux_{W4} > 5mJy$, i.e., $12.62 < W4 < 14.62 \rm\ AB \, magnitude$), optically faint ($r > 23$) or with red color ($r - W4 >8.38$). Gemini/GNIRS observations were conducted for all 24 candidates, and 18/24 were also observed with Keck/LRIS. The observations confirm 23 to be real Type-2 QSOs in the redshift range $0.88 - 2.99$ (12 are at $z>2$). We collect multi-band photometry and conduct SED fitting. The composite photometry probes the wavelength from 0.1$\rm 渭$m to 10$\rm 渭$m at the rest frame. The IR emission is dominated by dust torus implying an average torus luminosity for the sample of $L_{\rm torus} 10^{46.84} \rm erg/s$. The origin of the rest-UV/optical light is not definitive, but we present three possible scenarios: scattered light, stellar emission, and the reddened accretion disk. Assuming an obscured:unobscured ratio of approximately 1:1, our targets have $L_{\rm bol} = 10^{46.28} \rm erg \,s^{-1} - 10^{47.49} \rm erg \,s^{-1}$ and around SMBH masses $\rm 10^{8.18} M_{\odot} - 10^{9.39} M_{\odot}$, assuming they accreate at the Eddington limit. Compared to previous Type-2 AGN SEDs, our targets have a brighter dust torus and redder optical-IR color. By comparing the SED to the results from JWST `little red dots' (LRDs), we find that these IR-selected Type-2 QSOs have similar SED shapes to the LRDs. This pilot Type-2 QSO survey demonstrates that mid-IR selection is an efficient way to find luminous Type-2 QSOs at $z>2$. Finally, the composite photometry and Type-2 QSOs SED model generated by this sample provide a guide for finding more Type-2 QSOs at higher redshift. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14026v1-abstract-full').style.display = 'none'; document.getElementById('2501.14026v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13335">arXiv:2501.13335</a> <span> [<a href="https://arxiv.org/pdf/2501.13335">pdf</a>, <a href="https://arxiv.org/format/2501.13335">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deblur-Avatar: Animatable Avatars from Motion-Blurred Monocular Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Luo%2C+X">Xianrui Luo</a>, <a href="/search/?searchtype=author&query=Peng%2C+J">Juewen Peng</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhongang Cai</a>, <a href="/search/?searchtype=author&query=Yang%2C+L">Lei Yang</a>, <a href="/search/?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhiguo Cao</a>, <a href="/search/?searchtype=author&query=Lin%2C+G">Guosheng Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13335v1-abstract-short" style="display: inline;"> We introduce Deblur-Avatar, a novel framework for modeling high-fidelity, animatable 3D human avatars from motion-blurred monocular video inputs. Motion blur is prevalent in real-world dynamic video capture, especially due to human movements in 3D human avatar modeling. Existing methods either (1) assume sharp image inputs, failing to address the detail loss introduced by motion blur, or (2) mainl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13335v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13335v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13335v1-abstract-full" style="display: none;"> We introduce Deblur-Avatar, a novel framework for modeling high-fidelity, animatable 3D human avatars from motion-blurred monocular video inputs. Motion blur is prevalent in real-world dynamic video capture, especially due to human movements in 3D human avatar modeling. Existing methods either (1) assume sharp image inputs, failing to address the detail loss introduced by motion blur, or (2) mainly consider blur by camera movements, neglecting the human motion blur which is more common in animatable avatars. Our proposed approach integrates a human movement-based motion blur model into 3D Gaussian Splatting (3DGS). By explicitly modeling human motion trajectories during exposure time, we jointly optimize the trajectories and 3D Gaussians to reconstruct sharp, high-quality human avatars. We employ a pose-dependent fusion mechanism to distinguish moving body regions, optimizing both blurred and sharp areas effectively. Extensive experiments on synthetic and real-world datasets demonstrate that Deblur-Avatar significantly outperforms existing methods in rendering quality and quantitative metrics, producing sharp avatar reconstructions and enabling real-time rendering under challenging motion blur conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13335v1-abstract-full').style.display = 'none'; document.getElementById('2501.13335v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12946">arXiv:2501.12946</a> <span> [<a href="https://arxiv.org/pdf/2501.12946">pdf</a>, <a href="https://arxiv.org/format/2501.12946">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Less is More: Simple yet Effective Heuristic Community Detection with Graph Convolution Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+H">Hong Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yinglong Zhang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Z">Zhangqi Zhao</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhicong Cai</a>, <a href="/search/?searchtype=author&query=Xia%2C+X">Xuewen Xia</a>, <a href="/search/?searchtype=author&query=Xu%2C+X">Xing Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12946v1-abstract-short" style="display: inline;"> Community detection is crucial in data mining. Traditional methods primarily focus on graph structure, often neglecting the significance of attribute features. In contrast, deep learning-based approaches incorporate attribute features and local structural information through contrastive learning, improving detection performance. However, existing algorithms' complex design and joint optimization m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12946v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12946v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12946v1-abstract-full" style="display: none;"> Community detection is crucial in data mining. Traditional methods primarily focus on graph structure, often neglecting the significance of attribute features. In contrast, deep learning-based approaches incorporate attribute features and local structural information through contrastive learning, improving detection performance. However, existing algorithms' complex design and joint optimization make them difficult to train and reduce detection efficiency. Additionally, these methods require the number of communities to be predefined, making the results susceptible to artificial interference. To address these challenges, we propose a simple yet effective community detection algorithm that can adaptively detect communities without relying on data augmentation and contrastive optimization. The proposed algorithm first performs community pre-detection to extract global structural information adaptively. It then utilizes GCN to integrate local structures and attribute features. Subsequently, it combines global, local structures and attribute features in the feature space to discover community affiliations. Finally, a modularity maximization method is employed to optimize the communities based on these three types of information, thereby uncovering the community affiliation of each node. We conduct experimental comparisons across various graph datasets, evaluating the proposed algorithm against traditional methods and state-of-the-art community detection algorithms. The experimental results demonstrate that our algorithm achieves greater efficiency and accuracy in terms of both detection speed and effectiveness. The code is available at https://github.com/wuanghoong/Less-is-More.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12946v1-abstract-full').style.display = 'none'; document.getElementById('2501.12946v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10654">arXiv:2501.10654</a> <span> [<a href="https://arxiv.org/pdf/2501.10654">pdf</a>, <a href="https://arxiv.org/format/2501.10654">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Efficient Transmission of Radiomaps via Physics-Enhanced Semantic Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yueling Zhou</a>, <a href="/search/?searchtype=author&query=Wijesinghe%2C+A">Achintha Wijesinghe</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+S">Songyang Zhang</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhipeng Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10654v2-abstract-short" style="display: inline;"> Enriching information of spectrum coverage, radiomap plays an important role in many wireless communication applications, such as resource allocation and network optimization. To enable real-time, distributed spectrum management, particularly in the scenarios with unstable and dynamic environments, the efficient transmission of spectrum coverage information for radiomaps from edge devices to the c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10654v2-abstract-full').style.display = 'inline'; document.getElementById('2501.10654v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10654v2-abstract-full" style="display: none;"> Enriching information of spectrum coverage, radiomap plays an important role in many wireless communication applications, such as resource allocation and network optimization. To enable real-time, distributed spectrum management, particularly in the scenarios with unstable and dynamic environments, the efficient transmission of spectrum coverage information for radiomaps from edge devices to the central server emerges as a critical problem. In this work, we propose an innovative physics-enhanced semantic communication framework tailored for efficient radiomap transmission based on generative learning models. Specifically, instead of bit-wise message passing, we only transmit the key "semantics" in radiomaps characterized by the radio propagation behavior and surrounding environments, where semantic compression schemes are utilized to reduce the communication overhead. Incorporating the novel concepts of Radio Depth Maps, the radiomaps are reconstructed from the delivered semantic information backboned on the conditional generative adversarial networks. Our framework is further extended to facilitate its implementation in the scenarios of multi-user edge computing, by integrating with federated learning for collaborative model training while preserving the data privacy. Experimental results show that our approach achieves high accuracy in radio coverage information recovery at ultra-high bandwidth efficiency, which has great potentials in many wireless-generated data transmission applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10654v2-abstract-full').style.display = 'none'; document.getElementById('2501.10654v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in 2025 IEEE International Conference on Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10291">arXiv:2501.10291</a> <span> [<a href="https://arxiv.org/pdf/2501.10291">pdf</a>, <a href="https://arxiv.org/format/2501.10291">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Low-overhead Magic State Circuits with Transversal CNOTs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Fazio%2C+N">Nicholas Fazio</a>, <a href="/search/?searchtype=author&query=Webster%2C+M">Mark Webster</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhenyu Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10291v1-abstract-short" style="display: inline;"> With the successful demonstration of transversal CNOTs in many recent experiments, it is the right moment to examine its implications on one of the most critical parts of fault-tolerant computation -- magic state preparation. Using an algorithm that can recompile and simplify a circuit of consecutive multi-qubit phase rotations, we manage to construct fault-tolerant circuits for CCZ, CS and T stat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10291v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10291v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10291v1-abstract-full" style="display: none;"> With the successful demonstration of transversal CNOTs in many recent experiments, it is the right moment to examine its implications on one of the most critical parts of fault-tolerant computation -- magic state preparation. Using an algorithm that can recompile and simplify a circuit of consecutive multi-qubit phase rotations, we manage to construct fault-tolerant circuits for CCZ, CS and T states with minimal T-depth and also much lower CNOT depths and qubit counts than before. These circuits can play crucial roles in fault-tolerant computation with transversal CNOTs, and we hope that the algorithms and methods developed in this paper can be used to further simplify other protocols in similar contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10291v1-abstract-full').style.display = 'none'; document.getElementById('2501.10291v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09782">arXiv:2501.09782</a> <span> [<a href="https://arxiv.org/pdf/2501.09782">pdf</a>, <a href="https://arxiv.org/format/2501.09782">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> SMPLest-X: Ultimate Scaling for Expressive Human Pose and Shape Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yin%2C+W">Wanqi Yin</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhongang Cai</a>, <a href="/search/?searchtype=author&query=Wang%2C+R">Ruisi Wang</a>, <a href="/search/?searchtype=author&query=Zeng%2C+A">Ailing Zeng</a>, <a href="/search/?searchtype=author&query=Wei%2C+C">Chen Wei</a>, <a href="/search/?searchtype=author&query=Sun%2C+Q">Qingping Sun</a>, <a href="/search/?searchtype=author&query=Mei%2C+H">Haiyi Mei</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yanjun Wang</a>, <a href="/search/?searchtype=author&query=Pang%2C+H+E">Hui En Pang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Mingyuan Zhang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/?searchtype=author&query=Loy%2C+C+C">Chen Change Loy</a>, <a href="/search/?searchtype=author&query=Yamashita%2C+A">Atsushi Yamashita</a>, <a href="/search/?searchtype=author&query=Yang%2C+L">Lei Yang</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Ziwei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09782v1-abstract-short" style="display: inline;"> Expressive human pose and shape estimation (EHPS) unifies body, hands, and face motion capture with numerous applications. Despite encouraging progress, current state-of-the-art methods focus on training innovative architectural designs on confined datasets. In this work, we investigate the impact of scaling up EHPS towards a family of generalist foundation models. 1) For data scaling, we perform… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09782v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09782v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09782v1-abstract-full" style="display: none;"> Expressive human pose and shape estimation (EHPS) unifies body, hands, and face motion capture with numerous applications. Despite encouraging progress, current state-of-the-art methods focus on training innovative architectural designs on confined datasets. In this work, we investigate the impact of scaling up EHPS towards a family of generalist foundation models. 1) For data scaling, we perform a systematic investigation on 40 EHPS datasets, encompassing a wide range of scenarios that a model trained on any single dataset cannot handle. More importantly, capitalizing on insights obtained from the extensive benchmarking process, we optimize our training scheme and select datasets that lead to a significant leap in EHPS capabilities. Ultimately, we achieve diminishing returns at 10M training instances from diverse data sources. 2) For model scaling, we take advantage of vision transformers (up to ViT-Huge as the backbone) to study the scaling law of model sizes in EHPS. To exclude the influence of algorithmic design, we base our experiments on two minimalist architectures: SMPLer-X, which consists of an intermediate step for hand and face localization, and SMPLest-X, an even simpler version that reduces the network to its bare essentials and highlights significant advances in the capture of articulated hands. With big data and the large model, the foundation models exhibit strong performance across diverse test benchmarks and excellent transferability to even unseen environments. Moreover, our finetuning strategy turns the generalist into specialist models, allowing them to achieve further performance boosts. Notably, our foundation models consistently deliver state-of-the-art results on seven benchmarks such as AGORA, UBody, EgoBody, and our proposed SynHand dataset for comprehensive hand evaluation. (Code is available at: https://github.com/wqyin/SMPLest-X). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09782v1-abstract-full').style.display = 'none'; document.getElementById('2501.09782v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">An extension of SMPLer-X [arXiv:2309.17448]. Homepage: https://caizhongang.com/projects/SMPLer-X/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09580">arXiv:2501.09580</a> <span> [<a href="https://arxiv.org/pdf/2501.09580">pdf</a>, <a href="https://arxiv.org/format/2501.09580">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Astrophysical Phenomena">astro-ph.HE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Astrophysics of Galaxies">astro-ph.GA</span> </div> </div> <p class="title is-5 mathjax"> An Intermediate-mass Black Hole Lurking in A Galactic Halo Caught Alive during Outburst </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jin%2C+C+-">C. -C. Jin</a>, <a href="/search/?searchtype=author&query=Li%2C+D+-">D. -Y. Li</a>, <a href="/search/?searchtype=author&query=Jiang%2C+N">N. Jiang</a>, <a href="/search/?searchtype=author&query=Dai%2C+L+-">L. -X. Dai</a>, <a href="/search/?searchtype=author&query=Cheng%2C+H+-">H. -Q. Cheng</a>, <a href="/search/?searchtype=author&query=Zhu%2C+J+-">J. -Z. Zhu</a>, <a href="/search/?searchtype=author&query=Yang%2C+C+-">C. -W. Yang</a>, <a href="/search/?searchtype=author&query=Rau%2C+A">A. Rau</a>, <a href="/search/?searchtype=author&query=Baldini%2C+P">P. Baldini</a>, <a href="/search/?searchtype=author&query=Wang%2C+T+-">T. -G. Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H+-">H. -Y. Zhou</a>, <a href="/search/?searchtype=author&query=Yuan%2C+W">W. Yuan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+C">C. Zhang</a>, <a href="/search/?searchtype=author&query=Shu%2C+X+-">X. -W. Shu</a>, <a href="/search/?searchtype=author&query=Shen%2C+R+-">R. -F. Shen</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y+-">Y. -L. Wang</a>, <a href="/search/?searchtype=author&query=Wen%2C+S+-">S. -X. Wen</a>, <a href="/search/?searchtype=author&query=Wu%2C+Q+-">Q. -Y. Wu</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y+-">Y. -B. Wang</a>, <a href="/search/?searchtype=author&query=Thomsen%2C+L+L">L. L. Thomsen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z+-">Z. -J. Zhang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+W+-">W. -J. Zhang</a>, <a href="/search/?searchtype=author&query=Coleiro%2C+A">A. Coleiro</a>, <a href="/search/?searchtype=author&query=Eyles-Ferris%2C+R">R. Eyles-Ferris</a>, <a href="/search/?searchtype=author&query=Fang%2C+X">X. Fang</a> , et al. (116 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09580v1-abstract-short" style="display: inline;"> Stellar-mass and supermassive black holes abound in the Universe, whereas intermediate-mass black holes (IMBHs) of ~10^2-10^5 solar masses in between are largely missing observationally, with few cases found only. Here we report the real-time discovery of a long-duration X-ray transient, EP240222a, accompanied by an optical flare with prominent H and He emission lines revealed by prompt follow-up… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09580v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09580v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09580v1-abstract-full" style="display: none;"> Stellar-mass and supermassive black holes abound in the Universe, whereas intermediate-mass black holes (IMBHs) of ~10^2-10^5 solar masses in between are largely missing observationally, with few cases found only. Here we report the real-time discovery of a long-duration X-ray transient, EP240222a, accompanied by an optical flare with prominent H and He emission lines revealed by prompt follow-up observations. Its observed properties evidence an IMBH located unambiguously in the halo of a nearby galaxy and flaring by tidally disrupting a star -- the only confirmed off-nucleus IMBH-tidal disruption event so far. This work demonstrates the potential of sensitive time-domain X-ray surveys, complemented by timely multi-wavelength follow-ups, in probing IMBHs, their environments, demographics, origins and connections to stellar-mass and supermassive black holes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09580v1-abstract-full').style.display = 'none'; document.getElementById('2501.09580v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">64 pages, 15 figures, submitted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08643">arXiv:2501.08643</a> <span> [<a href="https://arxiv.org/pdf/2501.08643">pdf</a>, <a href="https://arxiv.org/format/2501.08643">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MonSter: Marry Monodepth to Stereo Unleashes Power </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cheng%2C+J">Junda Cheng</a>, <a href="/search/?searchtype=author&query=Liu%2C+L">Longliang Liu</a>, <a href="/search/?searchtype=author&query=Xu%2C+G">Gangwei Xu</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xianqi Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhaoxing Zhang</a>, <a href="/search/?searchtype=author&query=Deng%2C+Y">Yong Deng</a>, <a href="/search/?searchtype=author&query=Zang%2C+J">Jinliang Zang</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yurui Chen</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhipeng Cai</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08643v1-abstract-short" style="display: inline;"> Stereo matching recovers depth from image correspondences. Existing methods struggle to handle ill-posed regions with limited matching cues, such as occlusions and textureless areas. To address this, we propose MonSter, a novel method that leverages the complementary strengths of monocular depth estimation and stereo matching. MonSter integrates monocular depth and stereo matching into a dual-bran… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08643v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08643v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08643v1-abstract-full" style="display: none;"> Stereo matching recovers depth from image correspondences. Existing methods struggle to handle ill-posed regions with limited matching cues, such as occlusions and textureless areas. To address this, we propose MonSter, a novel method that leverages the complementary strengths of monocular depth estimation and stereo matching. MonSter integrates monocular depth and stereo matching into a dual-branch architecture to iteratively improve each other. Confidence-based guidance adaptively selects reliable stereo cues for monodepth scale-shift recovery. The refined monodepth is in turn guides stereo effectively at ill-posed regions. Such iterative mutual enhancement enables MonSter to evolve monodepth priors from coarse object-level structures to pixel-level geometry, fully unlocking the potential of stereo matching. As shown in Fig.1, MonSter ranks 1st across five most commonly used leaderboards -- SceneFlow, KITTI 2012, KITTI 2015, Middlebury, and ETH3D. Achieving up to 49.5% improvements (Bad 1.0 on ETH3D) over the previous best method. Comprehensive analysis verifies the effectiveness of MonSter in ill-posed regions. In terms of zero-shot generalization, MonSter significantly and consistently outperforms state-of-the-art across the board. The code is publicly available at: https://github.com/Junda24/MonSter. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08643v1-abstract-full').style.display = 'none'; document.getElementById('2501.08643v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06816">arXiv:2501.06816</a> <span> [<a href="https://arxiv.org/pdf/2501.06816">pdf</a>, <a href="https://arxiv.org/format/2501.06816">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> </div> </div> <p class="title is-5 mathjax"> Interaction-Induced Second-Order Skin Effect </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ling%2C+W">Wen-Zheng Ling</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhao-Fan Cai</a>, <a href="/search/?searchtype=author&query=Liu%2C+T">Tao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06816v1-abstract-short" style="display: inline;"> In contrast to the conventional (first-order) non-Hermitian skin effect (NHSE) in a $d$-dimensional system with linear size $L$, the $n$th-order (higher-order) NHSE is characterized by skin modes localized at lower-dimensional boundaries of dimension $(d-n)$. The total number of these modes scales linearly with the system size $L$. Significant progress has been made in understanding higher-order N… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06816v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06816v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06816v1-abstract-full" style="display: none;"> In contrast to the conventional (first-order) non-Hermitian skin effect (NHSE) in a $d$-dimensional system with linear size $L$, the $n$th-order (higher-order) NHSE is characterized by skin modes localized at lower-dimensional boundaries of dimension $(d-n)$. The total number of these modes scales linearly with the system size $L$. Significant progress has been made in understanding higher-order NHSE in non-interacting systems. In this work, we demonstrate the many-body interaction induced second-order skin effect in a two-dimensional non-Hermitian bosonic system. Specifically, we construct a non-Hermitian square lattice that incorporates nonreciprocal single-boson hopping, onsite many-body interactions and two-boson pairing hopping. In the absence of interactions, no second-order NHSE is observed. However, with the inclusion of interactions, we identify interaction-induced skin modes for in-gap doublon states (i.e., bound pairs of bosons) localized at the corners of the lattice, while the bulk doublon states remain extended. These corner-localized skin modes arise from the interplay between interaction-induced edge states, localized along one-dimensional boundaries, and the nonreciprocal hopping along these boundaries. Furthermore, the number of corner skin modes scales linearly with the system size, confirming the presence of second-order NHSE in this interacting system. Our findings introduce a novel approach to realizing higher-order skin effects by leveraging interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06816v1-abstract-full').style.display = 'none'; document.getElementById('2501.06816v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05694">arXiv:2501.05694</a> <span> [<a href="https://arxiv.org/pdf/2501.05694">pdf</a>, <a href="https://arxiv.org/ps/2501.05694">ps</a>, <a href="https://arxiv.org/format/2501.05694">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Myths around quantum computation before full fault tolerance: What no-go theorems rule out and what they don't </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zimbor%C3%A1s%2C+Z">Zolt谩n Zimbor谩s</a>, <a href="/search/?searchtype=author&query=Koczor%2C+B">B谩lint Koczor</a>, <a href="/search/?searchtype=author&query=Holmes%2C+Z">Zo毛 Holmes</a>, <a href="/search/?searchtype=author&query=Borrelli%2C+E">Elsi-Mari Borrelli</a>, <a href="/search/?searchtype=author&query=Gily%C3%A9n%2C+A">Andr谩s Gily茅n</a>, <a href="/search/?searchtype=author&query=Huang%2C+H">Hsin-Yuan Huang</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhenyu Cai</a>, <a href="/search/?searchtype=author&query=Ac%C3%ADn%2C+A">Antonio Ac铆n</a>, <a href="/search/?searchtype=author&query=Aolita%2C+L">Leandro Aolita</a>, <a href="/search/?searchtype=author&query=Banchi%2C+L">Leonardo Banchi</a>, <a href="/search/?searchtype=author&query=Brand%C3%A3o%2C+F+G+S+L">Fernando G. S. L. Brand茫o</a>, <a href="/search/?searchtype=author&query=Cavalcanti%2C+D">Daniel Cavalcanti</a>, <a href="/search/?searchtype=author&query=Cubitt%2C+T">Toby Cubitt</a>, <a href="/search/?searchtype=author&query=Filippov%2C+S+N">Sergey N. Filippov</a>, <a href="/search/?searchtype=author&query=Garc%C3%ADa-P%C3%A9rez%2C+G">Guillermo Garc铆a-P茅rez</a>, <a href="/search/?searchtype=author&query=Goold%2C+J">John Goold</a>, <a href="/search/?searchtype=author&query=K%C3%A1lm%C3%A1n%2C+O">Orsolya K谩lm谩n</a>, <a href="/search/?searchtype=author&query=Kyoseva%2C+E">Elica Kyoseva</a>, <a href="/search/?searchtype=author&query=Rossi%2C+M+A+C">Matteo A. C. Rossi</a>, <a href="/search/?searchtype=author&query=Sokolov%2C+B">Boris Sokolov</a>, <a href="/search/?searchtype=author&query=Tavernelli%2C+I">Ivano Tavernelli</a>, <a href="/search/?searchtype=author&query=Maniscalco%2C+S">Sabrina Maniscalco</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05694v1-abstract-short" style="display: inline;"> In this perspective article, we revisit and critically evaluate prevailing viewpoints on the capabilities and limitations of near-term quantum computing and its potential transition toward fully fault-tolerant quantum computing. We examine theoretical no-go results and their implications, addressing misconceptions about the practicality of quantum error mitigation techniques and variational quantu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05694v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05694v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05694v1-abstract-full" style="display: none;"> In this perspective article, we revisit and critically evaluate prevailing viewpoints on the capabilities and limitations of near-term quantum computing and its potential transition toward fully fault-tolerant quantum computing. We examine theoretical no-go results and their implications, addressing misconceptions about the practicality of quantum error mitigation techniques and variational quantum algorithms. By emphasizing the nuances of error scaling, circuit depth, and algorithmic feasibility, we highlight viable near-term applications and synergies between error mitigation and early fault-tolerant architectures. Our discussion explores strategies for addressing current challenges, such as barren plateaus in variational circuits and the integration of quantum error mitigation and quantum error correction techniques. We aim to underscore the importance of continued innovation in hardware and algorithmic design to bridge the gap between theoretical potential and practical utility, paving the way for meaningful quantum advantage in the era of late noisy intermediate scale and early fault-tolerant quantum devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05694v1-abstract-full').style.display = 'none'; document.getElementById('2501.05694v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Comments welcome</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02671">arXiv:2501.02671</a> <span> [<a href="https://arxiv.org/pdf/2501.02671">pdf</a>, <a href="https://arxiv.org/format/2501.02671">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3627673.3679564">10.1145/3627673.3679564 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Quantum Cognition-Inspired EEG-based Recommendation via Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+J">Jinkun Han</a>, <a href="/search/?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yingshu Li</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhipeng Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02671v1-abstract-short" style="display: inline;"> Current recommendation systems recommend goods by considering users' historical behaviors, social relations, ratings, and other multi-modals. Although outdated user information presents the trends of a user's interests, no recommendation system can know the users' real-time thoughts indeed. With the development of brain-computer interfaces, it is time to explore next-generation recommenders that s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02671v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02671v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02671v1-abstract-full" style="display: none;"> Current recommendation systems recommend goods by considering users' historical behaviors, social relations, ratings, and other multi-modals. Although outdated user information presents the trends of a user's interests, no recommendation system can know the users' real-time thoughts indeed. With the development of brain-computer interfaces, it is time to explore next-generation recommenders that show users' real-time thoughts without delay. Electroencephalography (EEG) is a promising method of collecting brain signals because of its convenience and mobility. Currently, there is only few research on EEG-based recommendations due to the complexity of learning human brain activity. To explore the utility of EEG-based recommendation, we propose a novel neural network model, QUARK, combining Quantum Cognition Theory and Graph Convolutional Networks for accurate item recommendations. Compared with the state-of-the-art recommendation models, the superiority of QUARK is confirmed via extensive experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02671v1-abstract-full').style.display = 'none'; document.getElementById('2501.02671v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> CIKM '24: Proceedings of the 33rd ACM International Conference on Information and Knowledge Management, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02120">arXiv:2501.02120</a> <span> [<a href="https://arxiv.org/pdf/2501.02120">pdf</a>, <a href="https://arxiv.org/format/2501.02120">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Snakes on a Plane: mobile, low dimensional logical qubits on a 2D surface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Siegel%2C+A">Adam Siegel</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhenyu Cai</a>, <a href="/search/?searchtype=author&query=Jnane%2C+H">Hamza Jnane</a>, <a href="/search/?searchtype=author&query=Koczor%2C+B">Balint Koczor</a>, <a href="/search/?searchtype=author&query=Pexton%2C+S">Shaun Pexton</a>, <a href="/search/?searchtype=author&query=Strikis%2C+A">Armands Strikis</a>, <a href="/search/?searchtype=author&query=Benjamin%2C+S">Simon Benjamin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02120v1-abstract-short" style="display: inline;"> Recent demonstrations indicate that silicon-spin QPUs will be able to shuttle physical qubits rapidly and with high fidelity - a desirable feature for maximising logical connectivity, supporting new codes, and routing around damage. However it may seem that shuttling at the logical level is unwise: static defects in the device may 'scratch' a logical qubit as it passes, causing correlated errors t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02120v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02120v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02120v1-abstract-full" style="display: none;"> Recent demonstrations indicate that silicon-spin QPUs will be able to shuttle physical qubits rapidly and with high fidelity - a desirable feature for maximising logical connectivity, supporting new codes, and routing around damage. However it may seem that shuttling at the logical level is unwise: static defects in the device may 'scratch' a logical qubit as it passes, causing correlated errors to which the code is highly vulnerable. Here we explore an architecture where logical qubits are 1D strings ('snakes') which can be moved freely over a planar latticework. Possible scratch events are inferred via monitor qubits and the complimentary gap; if deemed a risk, remarkably the shuttle process can be undone in a way that negates any corruption. Interaction between logical snakes is facilitated by a semi-transversal method. We obtain encouraging estimates for the tolerable levels of shuttling-related imperfections. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02120v1-abstract-full').style.display = 'none'; document.getElementById('2501.02120v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 23 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01101">arXiv:2501.01101</a> <span> [<a href="https://arxiv.org/pdf/2501.01101">pdf</a>, <a href="https://arxiv.org/format/2501.01101">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deformable Gaussian Splatting for Efficient and High-Fidelity Reconstruction of Surgical Scenes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shan%2C+J">Jiwei Shan</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zeyu Cai</a>, <a href="/search/?searchtype=author&query=Hsieh%2C+C">Cheng-Tai Hsieh</a>, <a href="/search/?searchtype=author&query=Cheng%2C+S+S">Shing Shin Cheng</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Hesheng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01101v1-abstract-short" style="display: inline;"> Efficient and high-fidelity reconstruction of deformable surgical scenes is a critical yet challenging task. Building on recent advancements in 3D Gaussian splatting, current methods have seen significant improvements in both reconstruction quality and rendering speed. However, two major limitations remain: (1) difficulty in handling irreversible dynamic changes, such as tissue shearing, which are… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01101v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01101v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01101v1-abstract-full" style="display: none;"> Efficient and high-fidelity reconstruction of deformable surgical scenes is a critical yet challenging task. Building on recent advancements in 3D Gaussian splatting, current methods have seen significant improvements in both reconstruction quality and rendering speed. However, two major limitations remain: (1) difficulty in handling irreversible dynamic changes, such as tissue shearing, which are common in surgical scenes; and (2) the lack of hierarchical modeling for surgical scene deformation, which reduces rendering speed. To address these challenges, we introduce EH-SurGS, an efficient and high-fidelity reconstruction algorithm for deformable surgical scenes. We propose a deformation modeling approach that incorporates the life cycle of 3D Gaussians, effectively capturing both regular and irreversible deformations, thus enhancing reconstruction quality. Additionally, we present an adaptive motion hierarchy strategy that distinguishes between static and deformable regions within the surgical scene. This strategy reduces the number of 3D Gaussians passing through the deformation field, thereby improving rendering speed. Extensive experiments demonstrate that our method surpasses existing state-of-the-art approaches in both reconstruction quality and rendering speed. Ablation studies further validate the effectiveness and necessity of our proposed components. We will open-source our code upon acceptance of the paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01101v1-abstract-full').style.display = 'none'; document.getElementById('2501.01101v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 4 figures, submitted to ICRA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20299">arXiv:2412.20299</a> <span> [<a href="https://arxiv.org/pdf/2412.20299">pdf</a>, <a href="https://arxiv.org/format/2412.20299">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> No Preference Left Behind: Group Distributional Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yao%2C+B">Binwei Yao</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zefan Cai</a>, <a href="/search/?searchtype=author&query=Chuang%2C+Y">Yun-Shiuan Chuang</a>, <a href="/search/?searchtype=author&query=Yang%2C+S">Shanglin Yang</a>, <a href="/search/?searchtype=author&query=Jiang%2C+M">Ming Jiang</a>, <a href="/search/?searchtype=author&query=Yang%2C+D">Diyi Yang</a>, <a href="/search/?searchtype=author&query=Hu%2C+J">Junjie Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20299v1-abstract-short" style="display: inline;"> Preferences within a group of people are not uniform but follow a distribution. While existing alignment methods like Direct Preference Optimization (DPO) attempt to steer models to reflect human preferences, they struggle to capture the distributional pluralistic preferences within a group. These methods often skew toward dominant preferences, overlooking the diversity of opinions, especially whe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20299v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20299v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20299v1-abstract-full" style="display: none;"> Preferences within a group of people are not uniform but follow a distribution. While existing alignment methods like Direct Preference Optimization (DPO) attempt to steer models to reflect human preferences, they struggle to capture the distributional pluralistic preferences within a group. These methods often skew toward dominant preferences, overlooking the diversity of opinions, especially when conflicting preferences arise. To address this issue, we propose Group Distribution Preference Optimization (GDPO), a novel framework that aligns language models with the distribution of preferences within a group by incorporating the concept of beliefs that shape individual preferences. GDPO calibrates a language model using statistical estimation of the group's belief distribution and aligns the model with belief-conditioned preferences, offering a more inclusive alignment framework than traditional methods. In experiments using both synthetic controllable opinion generation and real-world movie review datasets, we show that DPO fails to align with the targeted belief distributions, while GDPO consistently reduces this alignment gap during training. Moreover, our evaluation metrics demonstrate that GDPO outperforms existing approaches in aligning with group distributional preferences, marking a significant advance in pluralistic alignment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20299v1-abstract-full').style.display = 'none'; document.getElementById('2412.20299v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20070">arXiv:2412.20070</a> <span> [<a href="https://arxiv.org/pdf/2412.20070">pdf</a>, <a href="https://arxiv.org/format/2412.20070">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the Compositional Generalization of Multimodal LLMs for Medical Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhenyang Cai</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Junying Chen</a>, <a href="/search/?searchtype=author&query=Wang%2C+R">Rongsheng Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+W">Weihong Wang</a>, <a href="/search/?searchtype=author&query=Deng%2C+Y">Yonglin Deng</a>, <a href="/search/?searchtype=author&query=Song%2C+D">Dingjie Song</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yize Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zixu Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Benyou Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20070v1-abstract-short" style="display: inline;"> Multimodal large language models (MLLMs) hold significant potential in the medical field, but their capabilities are often limited by insufficient data in certain medical domains, highlighting the need for understanding what kinds of images can be used by MLLMs for generalization. Current research suggests that multi-task training outperforms single-task as different tasks can benefit each other,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20070v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20070v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20070v1-abstract-full" style="display: none;"> Multimodal large language models (MLLMs) hold significant potential in the medical field, but their capabilities are often limited by insufficient data in certain medical domains, highlighting the need for understanding what kinds of images can be used by MLLMs for generalization. Current research suggests that multi-task training outperforms single-task as different tasks can benefit each other, but they often overlook the internal relationships within these tasks, providing limited guidance on selecting datasets to enhance specific tasks. To analyze this phenomenon, we attempted to employ compositional generalization (CG)-the ability of models to understand novel combinations by recombining learned elements-as a guiding framework. Since medical images can be precisely defined by Modality, Anatomical area, and Task, naturally providing an environment for exploring CG. Therefore, we assembled 106 medical datasets to create Med-MAT for comprehensive experiments. The experiments confirmed that MLLMs can use CG to understand unseen medical images and identified CG as one of the main drivers of the generalization observed in multi-task training. Additionally, further studies demonstrated that CG effectively supports datasets with limited data and delivers consistent performance across different backbones, highlighting its versatility and broad applicability. Med-MAT is publicly available at https://github.com/FreedomIntelligence/Med-MAT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20070v1-abstract-full').style.display = 'none'; document.getElementById('2412.20070v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18925">arXiv:2412.18925</a> <span> [<a href="https://arxiv.org/pdf/2412.18925">pdf</a>, <a href="https://arxiv.org/format/2412.18925">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> HuatuoGPT-o1, Towards Medical Complex Reasoning with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+J">Junying Chen</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhenyang Cai</a>, <a href="/search/?searchtype=author&query=Ji%2C+K">Ke Ji</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xidong Wang</a>, <a href="/search/?searchtype=author&query=Liu%2C+W">Wanlong Liu</a>, <a href="/search/?searchtype=author&query=Wang%2C+R">Rongsheng Wang</a>, <a href="/search/?searchtype=author&query=Hou%2C+J">Jianye Hou</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Benyou Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18925v1-abstract-short" style="display: inline;"> The breakthrough of OpenAI o1 highlights the potential of enhancing reasoning to improve LLM. Yet, most research in reasoning has focused on mathematical tasks, leaving domains like medicine underexplored. The medical domain, though distinct from mathematics, also demands robust reasoning to provide reliable answers, given the high standards of healthcare. However, verifying medical reasoning is c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18925v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18925v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18925v1-abstract-full" style="display: none;"> The breakthrough of OpenAI o1 highlights the potential of enhancing reasoning to improve LLM. Yet, most research in reasoning has focused on mathematical tasks, leaving domains like medicine underexplored. The medical domain, though distinct from mathematics, also demands robust reasoning to provide reliable answers, given the high standards of healthcare. However, verifying medical reasoning is challenging, unlike those in mathematics. To address this, we propose verifiable medical problems with a medical verifier to check the correctness of model outputs. This verifiable nature enables advancements in medical reasoning through a two-stage approach: (1) using the verifier to guide the search for a complex reasoning trajectory for fine-tuning LLMs, (2) applying reinforcement learning (RL) with verifier-based rewards to enhance complex reasoning further. Finally, we introduce HuatuoGPT-o1, a medical LLM capable of complex reasoning, which outperforms general and medical-specific baselines using only 40K verifiable problems. Experiments show complex reasoning improves medical problem-solving and benefits more from RL. We hope our approach inspires advancements in reasoning across medical and other specialized domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18925v1-abstract-full').style.display = 'none'; document.getElementById('2412.18925v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18619">arXiv:2412.18619</a> <span> [<a href="https://arxiv.org/pdf/2412.18619">pdf</a>, <a href="https://arxiv.org/format/2412.18619">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Next Token Prediction Towards Multimodal Intelligence: A Comprehensive Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+L">Liang Chen</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zekun Wang</a>, <a href="/search/?searchtype=author&query=Ren%2C+S">Shuhuai Ren</a>, <a href="/search/?searchtype=author&query=Li%2C+L">Lei Li</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Haozhe Zhao</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yunshui Li</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zefan Cai</a>, <a href="/search/?searchtype=author&query=Guo%2C+H">Hongcheng Guo</a>, <a href="/search/?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/?searchtype=author&query=Xiong%2C+Y">Yizhe Xiong</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yichi Zhang</a>, <a href="/search/?searchtype=author&query=Wu%2C+R">Ruoyu Wu</a>, <a href="/search/?searchtype=author&query=Dong%2C+Q">Qingxiu Dong</a>, <a href="/search/?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/?searchtype=author&query=Meng%2C+L">Lingwei Meng</a>, <a href="/search/?searchtype=author&query=Hu%2C+S">Shujie Hu</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yulong Chen</a>, <a href="/search/?searchtype=author&query=Lin%2C+J">Junyang Lin</a>, <a href="/search/?searchtype=author&query=Bai%2C+S">Shuai Bai</a>, <a href="/search/?searchtype=author&query=Vlachos%2C+A">Andreas Vlachos</a>, <a href="/search/?searchtype=author&query=Tan%2C+X">Xu Tan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Minjia Zhang</a>, <a href="/search/?searchtype=author&query=Xiao%2C+W">Wen Xiao</a>, <a href="/search/?searchtype=author&query=Yee%2C+A">Aaron Yee</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18619v2-abstract-short" style="display: inline;"> Building on the foundations of language modeling in natural language processing, Next Token Prediction (NTP) has evolved into a versatile training objective for machine learning tasks across various modalities, achieving considerable success. As Large Language Models (LLMs) have advanced to unify understanding and generation tasks within the textual modality, recent research has shown that tasks f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18619v2-abstract-full').style.display = 'inline'; document.getElementById('2412.18619v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18619v2-abstract-full" style="display: none;"> Building on the foundations of language modeling in natural language processing, Next Token Prediction (NTP) has evolved into a versatile training objective for machine learning tasks across various modalities, achieving considerable success. As Large Language Models (LLMs) have advanced to unify understanding and generation tasks within the textual modality, recent research has shown that tasks from different modalities can also be effectively encapsulated within the NTP framework, transforming the multimodal information into tokens and predict the next one given the context. This survey introduces a comprehensive taxonomy that unifies both understanding and generation within multimodal learning through the lens of NTP. The proposed taxonomy covers five key aspects: Multimodal tokenization, MMNTP model architectures, unified task representation, datasets \& evaluation, and open challenges. This new taxonomy aims to aid researchers in their exploration of multimodal intelligence. An associated GitHub repository collecting the latest papers and repos is available at https://github.com/LMM101/Awesome-Multimodal-Next-Token-Prediction <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18619v2-abstract-full').style.display = 'none'; document.getElementById('2412.18619v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">69 papes, 18 figures, repo at https://github.com/LMM101/Awesome-Multimodal-Next-Token-Prediction</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17573">arXiv:2412.17573</a> <span> [<a href="https://arxiv.org/pdf/2412.17573">pdf</a>, <a href="https://arxiv.org/format/2412.17573">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> URoadNet: Dual Sparse Attentive U-Net for Multiscale Road Network Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Song%2C+J">Jie Song</a>, <a href="/search/?searchtype=author&query=Sun%2C+Y">Yue Sun</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Ziyun Cai</a>, <a href="/search/?searchtype=author&query=Xiao%2C+L">Liang Xiao</a>, <a href="/search/?searchtype=author&query=Huang%2C+Y">Yawen Huang</a>, <a href="/search/?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17573v1-abstract-short" style="display: inline;"> The challenges of road network segmentation demand an algorithm capable of adapting to the sparse and irregular shapes, as well as the diverse context, which often leads traditional encoding-decoding methods and simple Transformer embeddings to failure. We introduce a computationally efficient and powerful framework for elegant road-aware segmentation. Our method, called URoadNet, effectively enco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17573v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17573v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17573v1-abstract-full" style="display: none;"> The challenges of road network segmentation demand an algorithm capable of adapting to the sparse and irregular shapes, as well as the diverse context, which often leads traditional encoding-decoding methods and simple Transformer embeddings to failure. We introduce a computationally efficient and powerful framework for elegant road-aware segmentation. Our method, called URoadNet, effectively encodes fine-grained local road connectivity and holistic global topological semantics while decoding multiscale road network information. URoadNet offers a novel alternative to the U-Net architecture by integrating connectivity attention, which can exploit intra-road interactions across multi-level sampling features with reduced computational complexity. This local interaction serves as valuable prior information for learning global interactions between road networks and the background through another integrality attention mechanism. The two forms of sparse attention are arranged alternatively and complementarily, and trained jointly, resulting in performance improvements without significant increases in computational complexity. Extensive experiments on various datasets with different resolutions, including Massachusetts, DeepGlobe, SpaceNet, and Large-Scale remote sensing images, demonstrate that URoadNet outperforms state-of-the-art techniques. Our approach represents a significant advancement in the field of road network extraction, providing a computationally feasible solution that achieves high-quality segmentation results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17573v1-abstract-full').style.display = 'none'; document.getElementById('2412.17573v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16684">arXiv:2412.16684</a> <span> [<a href="https://arxiv.org/pdf/2412.16684">pdf</a>, <a href="https://arxiv.org/format/2412.16684">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> MATES: Multi-view Aggregated Two-Sample Test </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cai%2C+Z">Zexi Cai</a>, <a href="/search/?searchtype=author&query=Fei%2C+W">Wenbo Fei</a>, <a href="/search/?searchtype=author&query=Zhou%2C+D">Doudou Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16684v1-abstract-short" style="display: inline;"> The two-sample test is a fundamental problem in statistics with a wide range of applications. In the realm of high-dimensional data, nonparametric methods have gained prominence due to their flexibility and minimal distributional assumptions. However, many existing methods tend to be more effective when the two distributions differ primarily in their first and/or second moments. In many real-world… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16684v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16684v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16684v1-abstract-full" style="display: none;"> The two-sample test is a fundamental problem in statistics with a wide range of applications. In the realm of high-dimensional data, nonparametric methods have gained prominence due to their flexibility and minimal distributional assumptions. However, many existing methods tend to be more effective when the two distributions differ primarily in their first and/or second moments. In many real-world scenarios, distributional differences may arise in higher-order moments, rendering traditional methods less powerful. To address this limitation, we propose a novel framework to aggregate information from multiple moments to build a test statistic. Each moment is regarded as one view of the data and contributes to the detection of some specific type of discrepancy, thus allowing the test statistic to capture more complex distributional differences. The novel multi-view aggregated two-sample test (MATES) leverages a graph-based approach, where the test statistic is constructed from the weighted similarity graphs of the pooled sample. Under mild conditions on the multi-view weighted similarity graphs, we establish theoretical properties of MATES, including a distribution-free limiting distribution under the null hypothesis, which enables straightforward type-I error control. Extensive simulation studies demonstrate that MATES effectively distinguishes subtle differences between distributions. We further validate the method on the S&P100 data, showcasing its power in detecting complex distributional variations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16684v1-abstract-full').style.display = 'none'; document.getElementById('2412.16684v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13612">arXiv:2412.13612</a> <span> [<a href="https://arxiv.org/pdf/2412.13612">pdf</a>, <a href="https://arxiv.org/format/2412.13612">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Are LLMs Good Literature Review Writers? Evaluating the Literature Review Writing Ability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tang%2C+X">Xuemei Tang</a>, <a href="/search/?searchtype=author&query=Duan%2C+X">Xufeng Duan</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z+G">Zhenguang G. Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13612v2-abstract-short" style="display: inline;"> The literature review is a crucial form of academic writing that involves complex processes of literature collection, organization, and summarization. The emergence of large language models (LLMs) has introduced promising tools to automate these processes. However, their actual capabilities in writing comprehensive literature reviews remain underexplored, such as whether they can generate accurate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13612v2-abstract-full').style.display = 'inline'; document.getElementById('2412.13612v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13612v2-abstract-full" style="display: none;"> The literature review is a crucial form of academic writing that involves complex processes of literature collection, organization, and summarization. The emergence of large language models (LLMs) has introduced promising tools to automate these processes. However, their actual capabilities in writing comprehensive literature reviews remain underexplored, such as whether they can generate accurate and reliable references. To address this gap, we propose a framework to assess the literature review writing ability of LLMs automatically. We evaluate the performance of LLMs across three tasks: generating references, writing abstracts, and writing literature reviews. We employ external tools for a multidimensional evaluation, which includes assessing hallucination rates in references, semantic coverage, and factual consistency with human-written context. By analyzing the experimental results, we find that, despite advancements, even the most sophisticated models still cannot avoid generating hallucinated references. Additionally, different models exhibit varying performance in literature review writing across different disciplines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13612v2-abstract-full').style.display = 'none'; document.getElementById('2412.13612v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 5 figures, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13452">arXiv:2412.13452</a> <span> [<a href="https://arxiv.org/pdf/2412.13452">pdf</a>, <a href="https://arxiv.org/format/2412.13452">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ConDo: Continual Domain Expansion for Absolute Pose Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+Z">Zijun Li</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhipeng Cai</a>, <a href="/search/?searchtype=author&query=Yang%2C+B">Bochun Yang</a>, <a href="/search/?searchtype=author&query=Shen%2C+X">Xuelun Shen</a>, <a href="/search/?searchtype=author&query=Shen%2C+S">Siqi Shen</a>, <a href="/search/?searchtype=author&query=Fan%2C+X">Xiaoliang Fan</a>, <a href="/search/?searchtype=author&query=Paulitsch%2C+M">Michael Paulitsch</a>, <a href="/search/?searchtype=author&query=Wang%2C+C">Cheng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13452v1-abstract-short" style="display: inline;"> Visual localization is a fundamental machine learning problem. Absolute Pose Regression (APR) trains a scene-dependent model to efficiently map an input image to the camera pose in a pre-defined scene. However, many applications have continually changing environments, where inference data at novel poses or scene conditions (weather, geometry) appear after deployment. Training APR on a fixed datase… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13452v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13452v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13452v1-abstract-full" style="display: none;"> Visual localization is a fundamental machine learning problem. Absolute Pose Regression (APR) trains a scene-dependent model to efficiently map an input image to the camera pose in a pre-defined scene. However, many applications have continually changing environments, where inference data at novel poses or scene conditions (weather, geometry) appear after deployment. Training APR on a fixed dataset leads to overfitting, making it fail catastrophically on challenging novel data. This work proposes Continual Domain Expansion (ConDo), which continually collects unlabeled inference data to update the deployed APR. Instead of applying standard unsupervised domain adaptation methods which are ineffective for APR, ConDo effectively learns from unlabeled data by distilling knowledge from scene-agnostic localization methods. By sampling data uniformly from historical and newly collected data, ConDo can effectively expand the generalization domain of APR. Large-scale benchmarks with various scene types are constructed to evaluate models under practical (long-term) data changes. ConDo consistently and significantly outperforms baselines across architectures, scene types, and data changes. On challenging scenes (Fig.1), it reduces the localization error by >7x (14.8m vs 1.7m). Analysis shows the robustness of ConDo against compute budgets, replay buffer sizes and teacher prediction noise. Comparing to model re-training, ConDo achieves similar performance up to 25x faster. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13452v1-abstract-full').style.display = 'none'; document.getElementById('2412.13452v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12529">arXiv:2412.12529</a> <span> [<a href="https://arxiv.org/pdf/2412.12529">pdf</a>, <a href="https://arxiv.org/format/2412.12529">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Fault-tolerant Quantum Computation without Distillation on a 2D Device </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Scruby%2C+T+R">Thomas R. Scruby</a>, <a href="/search/?searchtype=author&query=Nemoto%2C+K">Kae Nemoto</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zhenyu Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12529v2-abstract-short" style="display: inline;"> We show how looped pipeline architectures - which use short-range shuttling of physical qubits to achieve a finite amount of non-local connectivity - can be used to efficiently implement the fault-tolerant non-Clifford gate between 2D surface codes described in (Sci. Adv. 6, eaay4929 (2020)). The shuttling schedule needed to implement this gate is only marginally more complex than is required for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12529v2-abstract-full').style.display = 'inline'; document.getElementById('2412.12529v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12529v2-abstract-full" style="display: none;"> We show how looped pipeline architectures - which use short-range shuttling of physical qubits to achieve a finite amount of non-local connectivity - can be used to efficiently implement the fault-tolerant non-Clifford gate between 2D surface codes described in (Sci. Adv. 6, eaay4929 (2020)). The shuttling schedule needed to implement this gate is only marginally more complex than is required for implementing the standard 2D surface code in this architecture. We compare the resource cost of this operation with the cost of magic state distillation and find that, at present, this comparison is heavily in favour of distillation. The high cost of the non-Clifford gate is almost entirely due to the relatively low performance of the just-in-time decoder used as part of this process, which necessitates very large code distances in order to achieve suitably low logical error rates. We argue that, as very little attention has previously been given to the study and optimisation of these decoders, there are potentially significant improvements to be made in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12529v2-abstract-full').style.display = 'none'; document.getElementById('2412.12529v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11586">arXiv:2412.11586</a> <span> [<a href="https://arxiv.org/pdf/2412.11586">pdf</a>, <a href="https://arxiv.org/format/2412.11586">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> StrandHead: Text to Strand-Disentangled 3D Head Avatars Using Hair Geometric Priors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Sun%2C+X">Xiaokun Sun</a>, <a href="/search/?searchtype=author&query=Cai%2C+Z">Zeyu Cai</a>, <a href="/search/?searchtype=author&query=Tai%2C+Y">Ying Tai</a>, <a href="/search/?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhenyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11586v2-abstract-short" style="display: inline;"> While haircut indicates distinct personality, existing avatar generation methods fail to model practical hair due to the general or entangled representation. We propose StrandHead, a novel text to 3D head avatar generation method capable of generating disentangled 3D hair with strand representation. Without using 3D data for supervision, we demonstrate that realistic hair strands can be generated… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11586v2-abstract-full').style.display = 'inline'; document.getElementById('2412.11586v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11586v2-abstract-full" style="display: none;"> While haircut indicates distinct personality, existing avatar generation methods fail to model practical hair due to the general or entangled representation. We propose StrandHead, a novel text to 3D head avatar generation method capable of generating disentangled 3D hair with strand representation. Without using 3D data for supervision, we demonstrate that realistic hair strands can be generated from prompts by distilling 2D generative diffusion models. To this end, we propose a series of reliable priors on shape initialization, geometric primitives, and statistical haircut features, leading to a stable optimization and text-aligned performance. Extensive experiments show that StrandHead achieves the state-of-the-art reality and diversity of generated 3D head and hair. The generated 3D hair can also be easily implemented in the Unreal Engine for physical simulation and other applications. The code will be available at https://xiaokunsun.github.io/StrandHead.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11586v2-abstract-full').style.display = 'none'; document.getElementById('2412.11586v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://xiaokunsun.github.io/StrandHead.github.io</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Cai%2C+Z&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository