CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 2,405 results for author: <span class="mathjax">Chen, D</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Chen, D"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Chen%2C+D&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Chen, D"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chen%2C+D&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17760">arXiv:2502.17760</a> <span> [<a href="https://arxiv.org/pdf/2502.17760">pdf</a>, <a href="https://arxiv.org/ps/2502.17760">ps</a>, <a href="https://arxiv.org/format/2502.17760">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Functional Analysis">math.FA</span> </div> </div> <p class="title is-5 mathjax"> Probabilistic Dual Frames and Minimization of Dual Frame Potentials </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Dongwei Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17760v1-abstract-short" style="display: inline;"> This paper studies probabilistic dual frames and associated dual frame potentials from the optimal mass transport perspective. The main contribution in this work shows that given a probabilistic frame, its dual frame potential is minimized if and only if the probabilistic frame is tight and the probabilistic dual frame is the canonical dual. In particular, the tightness condition can be dropped if… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17760v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17760v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17760v1-abstract-full" style="display: none;"> This paper studies probabilistic dual frames and associated dual frame potentials from the optimal mass transport perspective. The main contribution in this work shows that given a probabilistic frame, its dual frame potential is minimized if and only if the probabilistic frame is tight and the probabilistic dual frame is the canonical dual. In particular, the tightness condition can be dropped if the probabilistic dual frame potential is minimized only among probabilistic dual frames of pushforward type. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17760v1-abstract-full').style.display = 'none'; document.getElementById('2502.17760v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 42C15 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17537">arXiv:2502.17537</a> <span> [<a href="https://arxiv.org/pdf/2502.17537">pdf</a>, <a href="https://arxiv.org/format/2502.17537">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> On the Vulnerability of Concept Erasure in Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Beerens%2C+L">Lucas Beerens</a>, <a href="/search/?searchtype=author&query=Richardson%2C+A+D">Alex D. Richardson</a>, <a href="/search/?searchtype=author&query=Zhang%2C+K">Kaicheng Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dongdong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17537v1-abstract-short" style="display: inline;"> The proliferation of text-to-image diffusion models has raised significant privacy and security concerns, particularly regarding the generation of copyrighted or harmful images. To address these issues, research on machine unlearning has developed various concept erasure methods, which aim to remove the effect of unwanted data through post-hoc training. However, we show these erasure techniques ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17537v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17537v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17537v1-abstract-full" style="display: none;"> The proliferation of text-to-image diffusion models has raised significant privacy and security concerns, particularly regarding the generation of copyrighted or harmful images. To address these issues, research on machine unlearning has developed various concept erasure methods, which aim to remove the effect of unwanted data through post-hoc training. However, we show these erasure techniques are vulnerable, where images of supposedly erased concepts can still be generated using adversarially crafted prompts. We introduce RECORD, a coordinate-descent-based algorithm that discovers prompts capable of eliciting the generation of erased content. We demonstrate that RECORD significantly beats the attack success rate of current state-of-the-art attack methods. Furthermore, our findings reveal that models subjected to concept erasure are more susceptible to adversarial attacks than previously anticipated, highlighting the urgency for more robust unlearning approaches. We open source all our code at https://github.com/LucasBeerens/RECORD <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17537v1-abstract-full').style.display = 'none'; document.getElementById('2502.17537v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16645">arXiv:2502.16645</a> <span> [<a href="https://arxiv.org/pdf/2502.16645">pdf</a>, <a href="https://arxiv.org/format/2502.16645">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> CODESYNC: Synchronizing Large Language Models with Dynamic Code Evolution at Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+C">Chenlong Wang</a>, <a href="/search/?searchtype=author&query=Chu%2C+Z">Zhaoyang Chu</a>, <a href="/search/?searchtype=author&query=Cheng%2C+Z">Zhengxiang Cheng</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xuyi Yang</a>, <a href="/search/?searchtype=author&query=Qiu%2C+K">Kaiyue Qiu</a>, <a href="/search/?searchtype=author&query=Wan%2C+Y">Yao Wan</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Z">Zhou Zhao</a>, <a href="/search/?searchtype=author&query=Shi%2C+X">Xuanhua Shi</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dongping Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16645v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have exhibited exceptional performance in software engineering yet face challenges in adapting to continually evolving code knowledge, particularly regarding the frequent updates of third-party library APIs. This limitation, stemming from static pre-training datasets, often results in non-executable code or implementations with suboptimal safety and efficiency. To this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16645v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16645v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16645v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have exhibited exceptional performance in software engineering yet face challenges in adapting to continually evolving code knowledge, particularly regarding the frequent updates of third-party library APIs. This limitation, stemming from static pre-training datasets, often results in non-executable code or implementations with suboptimal safety and efficiency. To this end, this paper introduces CODESYNC, a data engine for identifying outdated code patterns and collecting real-time code knowledge updates from Python third-party libraries. Building upon CODESYNC, we develop CODESYNCBENCH, a comprehensive benchmark for assessing LLMs' ability to stay synchronized with code evolution, which covers real-world updates for 220 APIs from six Python libraries. Our benchmark offers 3,300 test cases across three evaluation tasks and an update-aware instruction tuning dataset consisting of 2,200 training samples. Extensive experiments on 14 state-of-the-art LLMs reveal that they struggle with dynamic code evolution, even with the support of advanced knowledge updating methods (e.g., DPO, ORPO, and SimPO). We believe that our benchmark can offer a strong foundation for the development of more effective methods for real-time code knowledge updating in the future. The experimental code and dataset are publicly available at: https://github.com/Lucky-voyage/Code-Sync. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16645v1-abstract-full').style.display = 'none'; document.getElementById('2502.16645v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16210">arXiv:2502.16210</a> <span> [<a href="https://arxiv.org/pdf/2502.16210">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.compenvurbsys.2025.102267">10.1016/j.compenvurbsys.2025.102267 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Interpreting core forms of urban morphology linked to urban functions with explainable graph neural network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Dongsheng Chen</a>, <a href="/search/?searchtype=author&query=Feng%2C+Y">Yu Feng</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xun Li</a>, <a href="/search/?searchtype=author&query=Qu%2C+M">Mingya Qu</a>, <a href="/search/?searchtype=author&query=Luo%2C+P">Peng Luo</a>, <a href="/search/?searchtype=author&query=Meng%2C+L">Liqiu Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16210v1-abstract-short" style="display: inline;"> Understanding the high-order relationship between urban form and function is essential for modeling the underlying mechanisms of sustainable urban systems. Nevertheless, it is challenging to establish an accurate data representation for complex urban forms that are readily explicable in human terms. This study proposed the concept of core urban morphology representation and developed an explainabl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16210v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16210v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16210v1-abstract-full" style="display: none;"> Understanding the high-order relationship between urban form and function is essential for modeling the underlying mechanisms of sustainable urban systems. Nevertheless, it is challenging to establish an accurate data representation for complex urban forms that are readily explicable in human terms. This study proposed the concept of core urban morphology representation and developed an explainable deep learning framework for explicably symbolizing complex urban forms into the novel representation, which we call CoMo. By interpretating the well-trained deep learning model with a stable weighted F1-score of 89.14%, CoMo presents a promising approach for revealing links between urban function and urban form in terms of core urban morphology representation. Using Boston as a study area, we analyzed the core urban forms at the individual-building, block, and neighborhood level that are important to corresponding urban functions. The residential core forms follow a gradual morphological pattern along the urban spine, which is consistent with a center-urban-suburban transition. Furthermore, we prove that urban morphology directly affects land use efficiency, which has a significantly strong correlation with the location (R2=0.721, p<0.001). Overall, CoMo can explicably symbolize urban forms, provide evidence for the classic urban location theory, and offer mechanistic insights for digital twins. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16210v1-abstract-full').style.display = 'none'; document.getElementById('2502.16210v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14880">arXiv:2502.14880</a> <span> [<a href="https://arxiv.org/pdf/2502.14880">pdf</a>, <a href="https://arxiv.org/format/2502.14880">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KKA: Improving Vision Anomaly Detection through Anomaly-related Knowledge from Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/?searchtype=author&query=Hu%2C+Z">Zhengqing Hu</a>, <a href="/search/?searchtype=author&query=Fan%2C+P">Peiguang Fan</a>, <a href="/search/?searchtype=author&query=Zhuang%2C+Y">Yueting Zhuang</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yafei Li</a>, <a href="/search/?searchtype=author&query=Liu%2C+Q">Qidong Liu</a>, <a href="/search/?searchtype=author&query=Jiang%2C+X">Xiaoheng Jiang</a>, <a href="/search/?searchtype=author&query=Xu%2C+M">Mingliang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14880v1-abstract-short" style="display: inline;"> Vision anomaly detection, particularly in unsupervised settings, often struggles to distinguish between normal samples and anomalies due to the wide variability in anomalies. Recently, an increasing number of studies have focused on generating anomalies to help detectors learn more effective boundaries between normal samples and anomalies. However, as the generated anomalies are often derived from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14880v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14880v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14880v1-abstract-full" style="display: none;"> Vision anomaly detection, particularly in unsupervised settings, often struggles to distinguish between normal samples and anomalies due to the wide variability in anomalies. Recently, an increasing number of studies have focused on generating anomalies to help detectors learn more effective boundaries between normal samples and anomalies. However, as the generated anomalies are often derived from random factors, they frequently lack realism. Additionally, randomly generated anomalies typically offer limited support in constructing effective boundaries, as most differ substantially from normal samples and lie far from the boundary. To address these challenges, we propose Key Knowledge Augmentation (KKA), a method that extracts anomaly-related knowledge from large language models (LLMs). More specifically, KKA leverages the extensive prior knowledge of LLMs to generate meaningful anomalies based on normal samples. Then, KKA classifies the generated anomalies as easy anomalies and hard anomalies according to their similarity to normal samples. Easy anomalies exhibit significant differences from normal samples, whereas hard anomalies closely resemble normal samples. KKA iteratively updates the generated anomalies, and gradually increasing the proportion of hard anomalies to enable the detector to learn a more effective boundary. Experimental results show that the proposed method significantly improves the performance of various vision anomaly detectors while maintaining low generation costs. The code for CMG can be found at https://github.com/Anfeather/KKA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14880v1-abstract-full').style.display = 'none'; document.getElementById('2502.14880v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14352">arXiv:2502.14352</a> <span> [<a href="https://arxiv.org/pdf/2502.14352">pdf</a>, <a href="https://arxiv.org/format/2502.14352">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SR-LLM: Rethinking the Structured Representation in Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+J">Jiahuan Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+T">Tianheng Wang</a>, <a href="/search/?searchtype=author&query=Wu%2C+H">Hanqing Wu</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Ziyi Huang</a>, <a href="/search/?searchtype=author&query=Wu%2C+Y">Yulong Wu</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dongbai Chen</a>, <a href="/search/?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yue Zhang</a>, <a href="/search/?searchtype=author&query=Rao%2C+G">Guozheng Rao</a>, <a href="/search/?searchtype=author&query=Yu%2C+K">Kaicheng Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14352v1-abstract-short" style="display: inline;"> Structured representations, exemplified by Abstract Meaning Representation (AMR), have long been pivotal in computational linguistics. However, their role remains ambiguous in the Large Language Models (LLMs) era. Initial attempts to integrate structured representation into LLMs via a zero-shot setting yielded inferior performance. We hypothesize that such a decline stems from the structure inform… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14352v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14352v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14352v1-abstract-full" style="display: none;"> Structured representations, exemplified by Abstract Meaning Representation (AMR), have long been pivotal in computational linguistics. However, their role remains ambiguous in the Large Language Models (LLMs) era. Initial attempts to integrate structured representation into LLMs via a zero-shot setting yielded inferior performance. We hypothesize that such a decline stems from the structure information being passed into LLMs in a code format unfamiliar to LLMs' training corpora. Consequently, we propose SR-LLM, an innovative framework with two settings to explore a superior way of integrating structured representation with LLMs from training-free and training-dependent perspectives. The former integrates structural information through natural language descriptions in LLM prompts, whereas its counterpart augments the model's inference capability through fine-tuning on linguistically described structured representations. Performance improvements were observed in widely downstream datasets, with particularly notable gains of 3.17% and 12.38% in PAWS. To the best of our knowledge, this work represents the pioneering demonstration that leveraging structural representations can substantially enhance LLMs' inference capability. We hope that our work sheds light and encourages future research to enhance the reasoning and interoperability of LLMs by structure data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14352v1-abstract-full').style.display = 'none'; document.getElementById('2502.14352v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14296">arXiv:2502.14296</a> <span> [<a href="https://arxiv.org/pdf/2502.14296">pdf</a>, <a href="https://arxiv.org/format/2502.14296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> On the Trustworthiness of Generative Foundation Models: Guideline, Assessment, and Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+Y">Yue Huang</a>, <a href="/search/?searchtype=author&query=Gao%2C+C">Chujie Gao</a>, <a href="/search/?searchtype=author&query=Wu%2C+S">Siyuan Wu</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Haoran Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xiangqi Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yujun Zhou</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yanbo Wang</a>, <a href="/search/?searchtype=author&query=Ye%2C+J">Jiayi Ye</a>, <a href="/search/?searchtype=author&query=Shi%2C+J">Jiawen Shi</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Q">Qihui Zhang</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yuan Li</a>, <a href="/search/?searchtype=author&query=Bao%2C+H">Han Bao</a>, <a href="/search/?searchtype=author&query=Liu%2C+Z">Zhaoyi Liu</a>, <a href="/search/?searchtype=author&query=Guan%2C+T">Tianrui Guan</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dongping Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+R">Ruoxi Chen</a>, <a href="/search/?searchtype=author&query=Guo%2C+K">Kehan Guo</a>, <a href="/search/?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/?searchtype=author&query=Kuen-Yew%2C+B+H">Bryan Hooi Kuen-Yew</a>, <a href="/search/?searchtype=author&query=Xiong%2C+C">Caiming Xiong</a>, <a href="/search/?searchtype=author&query=Stengel-Eskin%2C+E">Elias Stengel-Eskin</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongyang Zhang</a>, <a href="/search/?searchtype=author&query=Yin%2C+H">Hongzhi Yin</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Huan Zhang</a>, <a href="/search/?searchtype=author&query=Yao%2C+H">Huaxiu Yao</a> , et al. (41 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14296v1-abstract-short" style="display: inline;"> Generative Foundation Models (GenFMs) have emerged as transformative tools. However, their widespread adoption raises critical concerns regarding trustworthiness across dimensions. This paper presents a comprehensive framework to address these challenges through three key contributions. First, we systematically review global AI governance laws and policies from governments and regulatory bodies, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14296v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14296v1-abstract-full" style="display: none;"> Generative Foundation Models (GenFMs) have emerged as transformative tools. However, their widespread adoption raises critical concerns regarding trustworthiness across dimensions. This paper presents a comprehensive framework to address these challenges through three key contributions. First, we systematically review global AI governance laws and policies from governments and regulatory bodies, as well as industry practices and standards. Based on this analysis, we propose a set of guiding principles for GenFMs, developed through extensive multidisciplinary collaboration that integrates technical, ethical, legal, and societal perspectives. Second, we introduce TrustGen, the first dynamic benchmarking platform designed to evaluate trustworthiness across multiple dimensions and model types, including text-to-image, large language, and vision-language models. TrustGen leverages modular components--metadata curation, test case generation, and contextual variation--to enable adaptive and iterative assessments, overcoming the limitations of static evaluation methods. Using TrustGen, we reveal significant progress in trustworthiness while identifying persistent challenges. Finally, we provide an in-depth discussion of the challenges and future directions for trustworthy GenFMs, which reveals the complex, evolving nature of trustworthiness, highlighting the nuanced trade-offs between utility and trustworthiness, and consideration for various downstream applications, identifying persistent challenges and providing a strategic roadmap for future research. This work establishes a holistic framework for advancing trustworthiness in GenAI, paving the way for safer and more responsible integration of GenFMs into critical applications. To facilitate advancement in the community, we release the toolkit for dynamic evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14296v1-abstract-full').style.display = 'none'; document.getElementById('2502.14296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13539">arXiv:2502.13539</a> <span> [<a href="https://arxiv.org/pdf/2502.13539">pdf</a>, <a href="https://arxiv.org/format/2502.13539">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Bursting Filter Bubble: Enhancing Serendipity Recommendations with Aligned Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xi%2C+Y">Yunjia Xi</a>, <a href="/search/?searchtype=author&query=Weng%2C+M">Muyan Weng</a>, <a href="/search/?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/?searchtype=author&query=Yi%2C+C">Chao Yi</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dian Chen</a>, <a href="/search/?searchtype=author&query=Guo%2C+G">Gaoyang Guo</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Mao Zhang</a>, <a href="/search/?searchtype=author&query=Wu%2C+J">Jian Wu</a>, <a href="/search/?searchtype=author&query=Jiang%2C+Y">Yuning Jiang</a>, <a href="/search/?searchtype=author&query=Liu%2C+Q">Qingwen Liu</a>, <a href="/search/?searchtype=author&query=Yu%2C+Y">Yong Yu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+W">Weinan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13539v1-abstract-short" style="display: inline;"> Recommender systems (RSs) often suffer from the feedback loop phenomenon, e.g., RSs are trained on data biased by their recommendations. This leads to the filter bubble effect that reinforces homogeneous content and reduces user satisfaction. To this end, serendipity recommendations, which offer unexpected yet relevant items, are proposed. Recently, large language models (LLMs) have shown potentia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13539v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13539v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13539v1-abstract-full" style="display: none;"> Recommender systems (RSs) often suffer from the feedback loop phenomenon, e.g., RSs are trained on data biased by their recommendations. This leads to the filter bubble effect that reinforces homogeneous content and reduces user satisfaction. To this end, serendipity recommendations, which offer unexpected yet relevant items, are proposed. Recently, large language models (LLMs) have shown potential in serendipity prediction due to their extensive world knowledge and reasoning capabilities. However, they still face challenges in aligning serendipity judgments with human assessments, handling long user behavior sequences, and meeting the latency requirements of industrial RSs. To address these issues, we propose SERAL (Serendipity Recommendations with Aligned Large Language Models), a framework comprising three stages: (1) Cognition Profile Generation to compress user behavior into multi-level profiles; (2) SerenGPT Alignment to align serendipity judgments with human preferences using enriched training data; and (3) Nearline Adaptation to integrate SerenGPT into industrial RSs pipelines efficiently. Online experiments demonstrate that SERAL improves exposure ratio (PVR), clicks, and transactions of serendipitous items by 5.7%, 29.56%, and 27.6%, enhancing user experience without much impact on overall revenue. Now, it has been fully deployed in the "Guess What You Like" of the Taobao App homepage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13539v1-abstract-full').style.display = 'none'; document.getElementById('2502.13539v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12625">arXiv:2502.12625</a> <span> [<a href="https://arxiv.org/pdf/2502.12625">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Reversibly Strain Engineering and Electric-Field Control of Crystal Symmetry in Multiferroic Oxides </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Sun%2C+F">Fei Sun</a>, <a href="/search/?searchtype=author&query=Chen%2C+C">Chao Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Deyang Chen</a>, <a href="/search/?searchtype=author&query=Qin%2C+M">Minghui Qin</a>, <a href="/search/?searchtype=author&query=Lu%2C+X">Xubing Lu</a>, <a href="/search/?searchtype=author&query=Gao%2C+X">Xingsen Gao</a>, <a href="/search/?searchtype=author&query=Nelson%2C+C+T">Christopher T Nelson</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jun-Ming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12625v1-abstract-short" style="display: inline;"> Multiferroic oxides, such as BiFeO3, have garnered significant attention due to their coupled ferroelectric, magnetic, and elastic properties, offering exciting opportunities for multifunctional device applications. Controlling phase transitions in these materials is critical for tuning their physical properties and achieving desired functionalities. While numerous studies have focused on ferroele… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12625v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12625v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12625v1-abstract-full" style="display: none;"> Multiferroic oxides, such as BiFeO3, have garnered significant attention due to their coupled ferroelectric, magnetic, and elastic properties, offering exciting opportunities for multifunctional device applications. Controlling phase transitions in these materials is critical for tuning their physical properties and achieving desired functionalities. While numerous studies have focused on ferroelectric-ferroelectric transitions at rhombohedral-tetragonal morphotropic phase boundaries, far less attention has been given to the ferroelectric-antiferroelectric phase boundaries. Such systems hold promise for discovering novel physical phenomena, such as reversible phase transitions, enhanced piezoelectricity, and magnetoelectric coupling. In this work, we report a reversible antiferroelectric-to-ferroelectric phase transition in La doped BiFeO3 thin films. By modulating the residual strain via film thickness, an antiferroelectric orthorhombic phase is stabilized within a ferroelectric rhombohedral phase matrix. Under an external electric field, the phase transitions reversibly between these two states. This discovery not only enriches the understanding of orthorhombic-rhombohedral morphotropic phase boundaries but also provides a potential pathway for developing magnetoelectric devices with enhanced functionality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12625v1-abstract-full').style.display = 'none'; document.getElementById('2502.12625v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12527">arXiv:2502.12527</a> <span> [<a href="https://arxiv.org/pdf/2502.12527">pdf</a>, <a href="https://arxiv.org/format/2502.12527">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Comprehensive Assessment and Analysis for NSFW Content Erasure in Text-to-Image Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Die Chen</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Zhiwen Li</a>, <a href="/search/?searchtype=author&query=Chen%2C+C">Cen Chen</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xiaodan Li</a>, <a href="/search/?searchtype=author&query=Ye%2C+J">Jinyan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12527v1-abstract-short" style="display: inline;"> Text-to-image (T2I) diffusion models have gained widespread application across various domains, demonstrating remarkable creative potential. However, the strong generalization capabilities of these models can inadvertently led they to generate NSFW content even with efforts on filtering NSFW content from the training dataset, posing risks to their safe deployment. While several concept erasure met… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12527v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12527v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12527v1-abstract-full" style="display: none;"> Text-to-image (T2I) diffusion models have gained widespread application across various domains, demonstrating remarkable creative potential. However, the strong generalization capabilities of these models can inadvertently led they to generate NSFW content even with efforts on filtering NSFW content from the training dataset, posing risks to their safe deployment. While several concept erasure methods have been proposed to mitigate this issue, a comprehensive evaluation of their effectiveness remains absent. To bridge this gap, we present the first systematic investigation of concept erasure methods for NSFW content and its sub-themes in text-to-image diffusion models. At the task level, we provide a holistic evaluation of 11 state-of-the-art baseline methods with 14 variants. Specifically, we analyze these methods from six distinct assessment perspectives, including three conventional perspectives, i.e., erasure proportion, image quality, and semantic alignment, and three new perspectives, i.e., excessive erasure, the impact of explicit and implicit unsafe prompts, and robustness. At the tool level, we perform a detailed toxicity analysis of NSFW datasets and compare the performance of different NSFW classifiers, offering deeper insights into their performance alongside a compilation of comprehensive evaluation metrics. Our benchmark not only systematically evaluates concept erasure methods, but also delves into the underlying factors influencing their performance at the insight level. By synthesizing insights from various evaluation perspectives, we provide a deeper understanding of the challenges and opportunities in the field, offering actionable guidance and inspiration for advancing research and practical applications in concept erasure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12527v1-abstract-full').style.display = 'none'; document.getElementById('2502.12527v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12154">arXiv:2502.12154</a> <span> [<a href="https://arxiv.org/pdf/2502.12154">pdf</a>, <a href="https://arxiv.org/format/2502.12154">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Diffusion Models without Classifier-free Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tang%2C+Z">Zhicong Tang</a>, <a href="/search/?searchtype=author&query=Bao%2C+J">Jianmin Bao</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/?searchtype=author&query=Guo%2C+B">Baining Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12154v1-abstract-short" style="display: inline;"> This paper presents Model-guidance (MG), a novel objective for training diffusion model that addresses and removes of the commonly used Classifier-free guidance (CFG). Our innovative approach transcends the standard modeling of solely data distribution to incorporating the posterior probability of conditions. The proposed technique originates from the idea of CFG and is easy yet effective, making… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12154v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12154v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12154v1-abstract-full" style="display: none;"> This paper presents Model-guidance (MG), a novel objective for training diffusion model that addresses and removes of the commonly used Classifier-free guidance (CFG). Our innovative approach transcends the standard modeling of solely data distribution to incorporating the posterior probability of conditions. The proposed technique originates from the idea of CFG and is easy yet effective, making it a plug-and-play module for existing models. Our method significantly accelerates the training process, doubles the inference speed, and achieve exceptional quality that parallel and even surpass concurrent diffusion models with CFG. Extensive experiments demonstrate the effectiveness, efficiency, scalability on different models and datasets. Finally, we establish state-of-the-art performance on ImageNet 256 benchmarks with an FID of 1.34. Our code is available at https://github.com/tzco/Diffusion-wo-CFG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12154v1-abstract-full').style.display = 'none'; document.getElementById('2502.12154v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12130">arXiv:2502.12130</a> <span> [<a href="https://arxiv.org/pdf/2502.12130">pdf</a>, <a href="https://arxiv.org/format/2502.12130">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Scaling Autonomous Agents via Automatic Reward Modeling And Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhenfang Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Delin Chen</a>, <a href="/search/?searchtype=author&query=Sun%2C+R">Rui Sun</a>, <a href="/search/?searchtype=author&query=Liu%2C+W">Wenjun Liu</a>, <a href="/search/?searchtype=author&query=Gan%2C+C">Chuang Gan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12130v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable capabilities across a range of text-generation tasks. However, LLMs still struggle with problems requiring multi-step decision-making and environmental feedback, such as online shopping, scientific reasoning, and mathematical problem-solving. Unlike pure text data, collecting large-scale decision-making data is challenging. Moreover, many p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12130v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12130v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12130v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable capabilities across a range of text-generation tasks. However, LLMs still struggle with problems requiring multi-step decision-making and environmental feedback, such as online shopping, scientific reasoning, and mathematical problem-solving. Unlike pure text data, collecting large-scale decision-making data is challenging. Moreover, many powerful LLMs are only accessible through APIs, which hinders their fine-tuning for agent tasks due to cost and complexity. To address LLM agents' limitations, we propose a framework that can automatically learn a reward model from the environment without human annotations. This model can be used to evaluate the action trajectories of LLM agents and provide heuristics for task planning. Specifically, our approach involves employing one LLM-based agent to navigate an environment randomly, generating diverse action trajectories. Subsequently, a separate LLM is leveraged to assign a task intent and synthesize a negative response alongside the correct response for each trajectory. These triplets (task intent, positive response, and negative response) are then utilized as training data to optimize a reward model capable of scoring action trajectories. The effectiveness and generalizability of our framework are demonstrated through evaluations conducted on different agent benchmarks. In conclusion, our proposed framework represents a significant advancement in enhancing LLM agents' decision-making capabilities. By automating the learning of reward models, we overcome the challenges of data scarcity and API limitations, potentially revolutionizing the application of LLMs in complex and interactive environments. This research paves the way for more sophisticated AI agents capable of tackling a wide range of real-world problems requiring multi-step decision-making. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12130v1-abstract-full').style.display = 'none'; document.getElementById('2502.12130v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR2025, Project page: https://armap-agent.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10740">arXiv:2502.10740</a> <span> [<a href="https://arxiv.org/pdf/2502.10740">pdf</a>, <a href="https://arxiv.org/format/2502.10740">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Nuclear Experiment">nucl-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Accelerator Physics">physics.acc-ph</span> </div> </div> <p class="title is-5 mathjax"> Commissioning of a radiofrequency quadrupole cooler-buncher for collinear laser spectroscopy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+Y">Yin-Shen Liu</a>, <a href="/search/?searchtype=author&query=Hu%2C+H">Han-Rui Hu</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xiao-Fei Yang</a>, <a href="/search/?searchtype=author&query=Mei%2C+W">Wen-Cong Mei</a>, <a href="/search/?searchtype=author&query=Guo%2C+Y">Yang-Fan Guo</a>, <a href="/search/?searchtype=author&query=Yan%2C+Z">Zhou Yan</a>, <a href="/search/?searchtype=author&query=Chen%2C+S">Shao-Jie Chen</a>, <a href="/search/?searchtype=author&query=Bai%2C+S">Shi-wei Bai</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">Shu-Jing Wang</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yong-Chao Liu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+P">Peng Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dong-Yang Chen</a>, <a href="/search/?searchtype=author&query=Ye%2C+Y">Yan-Lin Ye</a>, <a href="/search/?searchtype=author&query=Li%2C+Q">Qi-Te Li</a>, <a href="/search/?searchtype=author&query=Yang%2C+J">Jie Yang</a>, <a href="/search/?searchtype=author&query=Malbrunot-Ettenauer%2C+S">Stephan Malbrunot-Ettenauer</a>, <a href="/search/?searchtype=author&query=Lechner%2C+S">Simon Lechner</a>, <a href="/search/?searchtype=author&query=Kanitz%2C+C">Carina Kanitz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10740v1-abstract-short" style="display: inline;"> A RadioFrequency Quadrupole (RFQ) cooler-buncher system has been developed and implemented in a collinear laser spectroscopy setup. This system is dedicated to convert a continuous ion beam into short bunches, while enhancing beam quality and reducing energy spread. The functionality of the RFQ cooler-buncher has been verified through offline tests with stable rubidium and indium beam, delivered f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10740v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10740v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10740v1-abstract-full" style="display: none;"> A RadioFrequency Quadrupole (RFQ) cooler-buncher system has been developed and implemented in a collinear laser spectroscopy setup. This system is dedicated to convert a continuous ion beam into short bunches, while enhancing beam quality and reducing energy spread. The functionality of the RFQ cooler-buncher has been verified through offline tests with stable rubidium and indium beam, delivered from a surface ion source and a laser ablation ion source, respectively. With a transmission efficiency exceeding 60%, bunched ion beams with a full width at half maximum of approximately 2渭s in the time-of-flight spectrum have been successfully achieved. The implementation of RFQ cooler-buncher system has significantly improved the overall transmission efficiency of the collinear laser spectroscopy setup. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10740v1-abstract-full').style.display = 'none'; document.getElementById('2502.10740v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10341">arXiv:2502.10341</a> <span> [<a href="https://arxiv.org/pdf/2502.10341">pdf</a>, <a href="https://arxiv.org/format/2502.10341">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Organize the Web: Constructing Domains Enhances Pre-Training Data Curation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wettig%2C+A">Alexander Wettig</a>, <a href="/search/?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/?searchtype=author&query=Min%2C+S">Sewon Min</a>, <a href="/search/?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Danqi Chen</a>, <a href="/search/?searchtype=author&query=Soldaini%2C+L">Luca Soldaini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10341v1-abstract-short" style="display: inline;"> Modern language models are trained on large, unstructured datasets consisting of trillions of tokens and obtained by crawling the web. The unstructured nature makes it difficult to reason about their contents and develop systematic approaches to data curation. In this paper, we unpack monolithic web corpora by developing taxonomies of their contents and organizing them into domains. We introduce W… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10341v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10341v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10341v1-abstract-full" style="display: none;"> Modern language models are trained on large, unstructured datasets consisting of trillions of tokens and obtained by crawling the web. The unstructured nature makes it difficult to reason about their contents and develop systematic approaches to data curation. In this paper, we unpack monolithic web corpora by developing taxonomies of their contents and organizing them into domains. We introduce WebOrganizer, a framework for organizing web pages in terms of both their topic and format. Using these two complementary notions of domains, we automatically annotate pre-training data by distilling annotations from a large language model into efficient classifiers. This allows us to study how data from different domains should be mixed to improve models on downstream tasks, and we show that we can combine insights about effective topics and formats to further boost performance. We demonstrate that our domain mixing also improves existing methods that select data based on quality. Furthermore, we study and compare how quality-based methods will implicitly change the domain mixture. Overall, our work demonstrates that constructing and mixing domains provides a valuable complement to quality-based data curation methods, opening new avenues for effective and insightful pre-training data curation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10341v1-abstract-full').style.display = 'none'; document.getElementById('2502.10341v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://weborganizer.allen.ai</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07640">arXiv:2502.07640</a> <span> [<a href="https://arxiv.org/pdf/2502.07640">pdf</a>, <a href="https://arxiv.org/format/2502.07640">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Goedel-Prover: A Frontier Model for Open-Source Automated Theorem Proving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lin%2C+Y">Yong Lin</a>, <a href="/search/?searchtype=author&query=Tang%2C+S">Shange Tang</a>, <a href="/search/?searchtype=author&query=Lyu%2C+B">Bohan Lyu</a>, <a href="/search/?searchtype=author&query=Wu%2C+J">Jiayun Wu</a>, <a href="/search/?searchtype=author&query=Lin%2C+H">Hongzhou Lin</a>, <a href="/search/?searchtype=author&query=Yang%2C+K">Kaiyu Yang</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jia Li</a>, <a href="/search/?searchtype=author&query=Xia%2C+M">Mengzhou Xia</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Danqi Chen</a>, <a href="/search/?searchtype=author&query=Arora%2C+S">Sanjeev Arora</a>, <a href="/search/?searchtype=author&query=Jin%2C+C">Chi Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07640v2-abstract-short" style="display: inline;"> We introduce Goedel-Prover, an open-source large language model (LLM) that achieves the state-of-the-art (SOTA) performance in automated formal proof generation for mathematical problems. The key challenge in this field is the scarcity of formalized math statements and proofs, which we tackle in the following ways. We train statement formalizers to translate the natural language math problems from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07640v2-abstract-full').style.display = 'inline'; document.getElementById('2502.07640v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07640v2-abstract-full" style="display: none;"> We introduce Goedel-Prover, an open-source large language model (LLM) that achieves the state-of-the-art (SOTA) performance in automated formal proof generation for mathematical problems. The key challenge in this field is the scarcity of formalized math statements and proofs, which we tackle in the following ways. We train statement formalizers to translate the natural language math problems from Numina into formal language (Lean 4), creating a dataset of 1.64 million formal statements. LLMs are used to check that the formal statements accurately preserve the content of the original natural language problems. We then iteratively build a large dataset of formal proofs by training a series of provers. Each prover succeeds in proving many statements that the previous ones could not, and these new proofs are added to the training set for the next prover. Despite using only supervised fine-tuning, our final prover significantly outperforms the previous best open-source model, DeepSeek-Prover-V1.5, which employs reinforcement learning. On the miniF2F benchmark, our model achieves a success rate of 57.6% (Pass@32), surpassing DeepSeek-Prover-V1.5 by 7.6%. On PutnamBench, Goedel-Prover successfully solves 7 problems (Pass@512), ranking first on the leaderboard. Furthermore, it generates 29.7K formal proofs for Lean Workbook problems, nearly doubling the 15.7K produced by earlier works. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07640v2-abstract-full').style.display = 'none'; document.getElementById('2502.07640v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07351">arXiv:2502.07351</a> <span> [<a href="https://arxiv.org/pdf/2502.07351">pdf</a>, <a href="https://arxiv.org/format/2502.07351">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-Task-oriented Nighttime Haze Imaging Enhancer for Vision-driven Measurement Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+A">Ai Chen</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yuxu Lu</a>, <a href="/search/?searchtype=author&query=Yang%2C+D">Dong Yang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J">Junlin Zhou</a>, <a href="/search/?searchtype=author&query=Fu%2C+Y">Yan Fu</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Duanbing Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07351v1-abstract-short" style="display: inline;"> Salient object detection (SOD) plays a critical role in vision-driven measurement systems (VMS), facilitating the detection and segmentation of key visual elements in an image. However, adverse imaging conditions such as haze during the day, low light, and haze at night severely degrade image quality, and complicating the SOD process. To address these challenges, we propose a multi-task-oriented n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07351v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07351v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07351v1-abstract-full" style="display: none;"> Salient object detection (SOD) plays a critical role in vision-driven measurement systems (VMS), facilitating the detection and segmentation of key visual elements in an image. However, adverse imaging conditions such as haze during the day, low light, and haze at night severely degrade image quality, and complicating the SOD process. To address these challenges, we propose a multi-task-oriented nighttime haze imaging enhancer (MToIE), which integrates three tasks: daytime dehazing, low-light enhancement, and nighttime dehazing. The MToIE incorporates two key innovative components: First, the network employs a task-oriented node learning mechanism to handle three specific degradation types: day-time haze, low light, and night-time haze conditions, with an embedded self-attention module enhancing its performance in nighttime imaging. In addition, multi-receptive field enhancement module that efficiently extracts multi-scale features through three parallel depthwise separable convolution branches with different dilation rates, capturing comprehensive spatial information with minimal computational overhead. To ensure optimal image reconstruction quality and visual characteristics, we suggest a hybrid loss function. Extensive experiments on different types of weather/imaging conditions illustrate that MToIE surpasses existing methods, significantly enhancing the accuracy and reliability of vision systems across diverse imaging scenarios. The code is available at https://github.com/Ai-Chen-Lab/MToIE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07351v1-abstract-full').style.display = 'none'; document.getElementById('2502.07351v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07179">arXiv:2502.07179</a> <span> [<a href="https://arxiv.org/pdf/2502.07179">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3934/era.2024131">10.3934/era.2024131 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Improved YOLOv7 model for insulator defect detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhenyue Wang</a>, <a href="/search/?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a>, <a href="/search/?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/?searchtype=author&query=Ma%2C+Y">Yi Ma</a>, <a href="/search/?searchtype=author&query=Ma%2C+Y">Yutang Ma</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07179v1-abstract-short" style="display: inline;"> Insulators are crucial insulation components and structural supports in power grids, playing a vital role in the transmission lines. Due to temperature fluctuations, internal stress, or damage from hail, insulators are prone to injury. Automatic detection of damaged insulators faces challenges such as diverse types, small defect targets, and complex backgrounds and shapes. Most research for detect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07179v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07179v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07179v1-abstract-full" style="display: none;"> Insulators are crucial insulation components and structural supports in power grids, playing a vital role in the transmission lines. Due to temperature fluctuations, internal stress, or damage from hail, insulators are prone to injury. Automatic detection of damaged insulators faces challenges such as diverse types, small defect targets, and complex backgrounds and shapes. Most research for detecting insulator defects has focused on a single defect type or a specific material. However, the insulators in the grid's transmission lines have different colors and materials. Various insulator defects coexist, and the existing methods have difficulty meeting the practical application requirements. Current methods suffer from low detection accuracy and mAP0.5 cannot meet application requirements. This paper proposes an improved YOLOv7 model for multi-type insulator defect detection. First, our model replaces the SPPCSPC module with the RFB module to enhance the network's feature extraction capability. Second, a CA mechanism is introduced into the head part to enhance the network's feature representation ability and to improve detection accuracy. Third, a WIoU loss function is employed to address the low-quality samples hindering model generalization during training, thereby improving the model's overall performance. The experimental results indicate that the proposed model exhibits enhancements across various performance metrics. Specifically, there is a 1.6% advancement in mAP_0.5, a corresponding 1.6% enhancement in mAP_0.5:0.95, a 1.3% elevation in precision, and a 1% increase in recall. Moreover, the model achieves parameter reduction by 3.2 million, leading to a decrease of 2.5 GFLOPS in computational cost. Notably, there is also an improvement of 2.81 milliseconds in single-image detection speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07179v1-abstract-full').style.display = 'none'; document.getElementById('2502.07179v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06280">arXiv:2502.06280</a> <span> [<a href="https://arxiv.org/pdf/2502.06280">pdf</a>, <a href="https://arxiv.org/format/2502.06280">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> IceBerg: Debiased Self-Training for Class-Imbalanced Node Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+Z">Zhixun Li</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dingshuo Chen</a>, <a href="/search/?searchtype=author&query=Zhao%2C+T">Tong Zhao</a>, <a href="/search/?searchtype=author&query=Wang%2C+D">Daixin Wang</a>, <a href="/search/?searchtype=author&query=Liu%2C+H">Hongrui Liu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhiqiang Zhang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/?searchtype=author&query=Yu%2C+J+X">Jeffrey Xu Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06280v1-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs) have achieved great success in dealing with non-Euclidean graph-structured data and have been widely deployed in many real-world applications. However, their effectiveness is often jeopardized under class-imbalanced training sets. Most existing studies have analyzed class-imbalanced node classification from a supervised learning perspective, but they do not fully utili… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06280v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06280v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06280v1-abstract-full" style="display: none;"> Graph Neural Networks (GNNs) have achieved great success in dealing with non-Euclidean graph-structured data and have been widely deployed in many real-world applications. However, their effectiveness is often jeopardized under class-imbalanced training sets. Most existing studies have analyzed class-imbalanced node classification from a supervised learning perspective, but they do not fully utilize the large number of unlabeled nodes in semi-supervised scenarios. We claim that the supervised signal is just the tip of the iceberg and a large number of unlabeled nodes have not yet been effectively utilized. In this work, we propose IceBerg, a debiased self-training framework to address the class-imbalanced and few-shot challenges for GNNs at the same time. Specifically, to figure out the Matthew effect and label distribution shift in self-training, we propose Double Balancing, which can largely improve the performance of existing baselines with just a few lines of code as a simple plug-and-play module. Secondly, to enhance the long-range propagation capability of GNNs, we disentangle the propagation and transformation operations of GNNs. Therefore, the weak supervision signals can propagate more effectively to address the few-shot issue. In summary, we find that leveraging unlabeled nodes can significantly enhance the performance of GNNs in class-imbalanced and few-shot scenarios, and even small, surgical modifications can lead to substantial performance improvements. Systematic experiments on benchmark datasets show that our method can deliver considerable performance gain over existing class-imbalanced node classification baselines. Additionally, due to IceBerg's outstanding ability to leverage unsupervised signals, it also achieves state-of-the-art results in few-shot node classification scenarios. The code of IceBerg is available at: https://github.com/ZhixunLEE/IceBerg. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06280v1-abstract-full').style.display = 'none'; document.getElementById('2502.06280v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by TheWebConf (WWW) 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05523">arXiv:2502.05523</a> <span> [<a href="https://arxiv.org/pdf/2502.05523">pdf</a>, <a href="https://arxiv.org/format/2502.05523">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Domain Scaling for Personalized Sequential Modeling in Recommenders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chai%2C+Z">Zheng Chai</a>, <a href="/search/?searchtype=author&query=Lu%2C+H">Hui Lu</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Di Chen</a>, <a href="/search/?searchtype=author&query=Ren%2C+Q">Qin Ren</a>, <a href="/search/?searchtype=author&query=Zheng%2C+Y">Yuchao Zheng</a>, <a href="/search/?searchtype=author&query=Zhou%2C+X">Xun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05523v2-abstract-short" style="display: inline;"> Users generally exhibit complex behavioral patterns and diverse intentions in multiple business scenarios of super applications like Douyin, presenting great challenges to current industrial multi-domain recommenders. To mitigate the discrepancies across diverse domains, researches and industrial practices generally emphasize sophisticated network structures to accomodate diverse data distribution… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05523v2-abstract-full').style.display = 'inline'; document.getElementById('2502.05523v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05523v2-abstract-full" style="display: none;"> Users generally exhibit complex behavioral patterns and diverse intentions in multiple business scenarios of super applications like Douyin, presenting great challenges to current industrial multi-domain recommenders. To mitigate the discrepancies across diverse domains, researches and industrial practices generally emphasize sophisticated network structures to accomodate diverse data distributions, while neglecting the inherent understanding of user behavioral sequence from the multi-domain perspective. In this paper, we present Adaptive Domain Scaling (ADS) model, which comprehensively enhances the personalization capability in target-aware sequence modeling across multiple domains. Specifically, ADS comprises of two major modules, including personalized sequence representation generation (PSRG) and personalized candidate representation generation (PCRG). The modules contribute to the tailored multi-domain learning by dynamically learning both the user behavioral sequence item representation and the candidate target item representation under different domains, facilitating adaptive user intention understanding. Experiments are performed on both a public dataset and two billion-scaled industrial datasets, and the extensive results verify the high effectiveness and compatibility of ADS. Besides, we conduct online experiments on two influential business scenarios including Douyin Advertisement Platform and Douyin E-commerce Service Platform, both of which show substantial business improvements. Currently, ADS has been fully deployed in many recommendation services at ByteDance, serving billions of users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05523v2-abstract-full').style.display = 'none'; document.getElementById('2502.05523v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05313">arXiv:2502.05313</a> <span> [<a href="https://arxiv.org/pdf/2502.05313">pdf</a>, <a href="https://arxiv.org/format/2502.05313">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Strongly Correlated Electrons">cond-mat.str-el</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Soft Condensed Matter">cond-mat.soft</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1073/pnas.2416111122">10.1073/pnas.2416111122 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Anomalous suppression of large-scale density fluctuations in classical and quantum spin liquids </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Duyu Chen</a>, <a href="/search/?searchtype=author&query=Samajdar%2C+R">Rhine Samajdar</a>, <a href="/search/?searchtype=author&query=Jiao%2C+Y">Yang Jiao</a>, <a href="/search/?searchtype=author&query=Torquato%2C+S">Salvatore Torquato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05313v1-abstract-short" style="display: inline;"> Classical spin liquids (CSLs) are intriguing states of matter that do not exhibit long-range magnetic order and are characterized by an extensive ground-state degeneracy. Adding quantum fluctuations, which induce dynamics between these different classical ground states, can give rise to quantum spin liquids (QSLs). QSLs are highly entangled quantum phases of matter characterized by fascinating eme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05313v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05313v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05313v1-abstract-full" style="display: none;"> Classical spin liquids (CSLs) are intriguing states of matter that do not exhibit long-range magnetic order and are characterized by an extensive ground-state degeneracy. Adding quantum fluctuations, which induce dynamics between these different classical ground states, can give rise to quantum spin liquids (QSLs). QSLs are highly entangled quantum phases of matter characterized by fascinating emergent properties, such as fractionalized excitations and topological order. One such exotic quantum liquid is the $\mathbb{Z}_2$ QSL, which can be regarded as a resonating valence bond (RVB) state formed from superpositions of dimer coverings of an underlying lattice. In this work, we unveil a \textit{hidden} large-scale structural property of archetypal CSLs and QSLs known as hyperuniformity, i.e., normalized infinite-wavelength density fluctuations are completely suppressed in these systems. In particular, we first demonstrate that classical ensembles of close-packed dimers and their corresponding quantum RVB states are perfectly hyperuniform in general. Subsequently, we focus on a ruby-lattice spin liquid that was recently realized in a Rydberg-atom quantum simulator, and show that the QSL remains effectively hyperuniform even in the presence of a finite density of spinon and vison excitations, as long as the dimer constraint is still largely preserved. Moreover, we demonstrate that metrics based on the framework of hyperuniformity can be used to distinguish the QSL from other proximate quantum phases. These metrics can help identify potential QSL candidates, which can then be further analyzed using more advanced, computationally-intensive quantum numerics to confirm their status as true QSLs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05313v1-abstract-full').style.display = 'none'; document.getElementById('2502.05313v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proc. Natl. Acad. Sci. U.S.A., 122(6), e2416111122 (2025) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05036">arXiv:2502.05036</a> <span> [<a href="https://arxiv.org/pdf/2502.05036">pdf</a>, <a href="https://arxiv.org/format/2502.05036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> nvAgent: Automated Data Visualization from Natural Language via Collaborative Agent Workflow </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ouyang%2C+G">Geliang Ouyang</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jingyao Chen</a>, <a href="/search/?searchtype=author&query=Nie%2C+Z">Zhihe Nie</a>, <a href="/search/?searchtype=author&query=Gui%2C+Y">Yi Gui</a>, <a href="/search/?searchtype=author&query=Wan%2C+Y">Yao Wan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hongyu Zhang</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dongping Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05036v1-abstract-short" style="display: inline;"> Natural Language to Visualization (NL2Vis) seeks to convert natural-language descriptions into visual representations of given tables, empowering users to derive insights from large-scale data. Recent advancements in Large Language Models (LLMs) show promise in automating code generation to transform tabular data into accessible visualizations. However, they often struggle with complex queries tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05036v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05036v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05036v1-abstract-full" style="display: none;"> Natural Language to Visualization (NL2Vis) seeks to convert natural-language descriptions into visual representations of given tables, empowering users to derive insights from large-scale data. Recent advancements in Large Language Models (LLMs) show promise in automating code generation to transform tabular data into accessible visualizations. However, they often struggle with complex queries that require reasoning across multiple tables. To address this limitation, we propose a collaborative agent workflow, termed nvAgent, for NL2Vis. Specifically, nvAgent comprises three agents: a processor agent for database processing and context filtering, a composer agent for planning visualization generation, and a validator agent for code translation and output verification. Comprehensive evaluations on the new VisEval benchmark demonstrate that nvAgent consistently surpasses state-of-the-art baselines, achieving a 7.88% improvement in single-table and a 9.23% improvement in multi-table scenarios. Qualitative analyses further highlight that nvAgent maintains nearly a 20% performance margin over previous models, underscoring its capacity to produce high-quality visual representations from complex, heterogeneous data sources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05036v1-abstract-full').style.display = 'none'; document.getElementById('2502.05036v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04380">arXiv:2502.04380</a> <span> [<a href="https://arxiv.org/pdf/2502.04380">pdf</a>, <a href="https://arxiv.org/format/2502.04380">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Diversity as a Reward: Fine-Tuning LLMs on a Mixture of Domain-Undetermined Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ling%2C+Z">Zhenqing Ling</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Daoyuan Chen</a>, <a href="/search/?searchtype=author&query=Yao%2C+L">Liuyi Yao</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yaliang Li</a>, <a href="/search/?searchtype=author&query=Shen%2C+Y">Ying Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04380v1-abstract-short" style="display: inline;"> Fine-tuning large language models (LLMs) using diverse datasets is crucial for enhancing their overall performance across various domains. In practical scenarios, existing methods based on modeling the mixture proportions of data composition often struggle with data whose domain labels are missing, imprecise or non-normalized, while methods based on data selection usually encounter difficulties in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04380v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04380v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04380v1-abstract-full" style="display: none;"> Fine-tuning large language models (LLMs) using diverse datasets is crucial for enhancing their overall performance across various domains. In practical scenarios, existing methods based on modeling the mixture proportions of data composition often struggle with data whose domain labels are missing, imprecise or non-normalized, while methods based on data selection usually encounter difficulties in balancing multi-domain performance. To address these challenges, in this paper, we study the role of data diversity in enhancing the overall abilities of LLMs by empirically constructing contrastive data pools and theoretically deriving explanations for both inter- and intra-diversity. Building upon the insights gained, we propose a new method that gives the LLM a dual identity: an output model to cognitively probe and select data based on diversity reward, as well as an input model to be tuned with the selected data. Extensive experiments show that the proposed method notably boosts performance across domain-undetermined data and a series of foundational downstream tasks when applied to various advanced LLMs. We release our code and hope this study can shed light on the understanding of data diversity and advance feedback-driven data-model co-development for LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04380v1-abstract-full').style.display = 'none'; document.getElementById('2502.04380v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages, 15 figures, 11 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03876">arXiv:2502.03876</a> <span> [<a href="https://arxiv.org/pdf/2502.03876">pdf</a>, <a href="https://arxiv.org/ps/2502.03876">ps</a>, <a href="https://arxiv.org/format/2502.03876">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Position: Untrained Machine Learning for Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Du%2C+J">Juan Du</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dongheng Chen</a>, <a href="/search/?searchtype=author&query=Yan%2C+H">Hao Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03876v1-abstract-short" style="display: inline;"> Anomaly detection based on 3D point cloud data is an important research problem and receives more and more attention recently. Untrained anomaly detection based on only one sample is an emerging research problem motivated by real manufacturing industries such as personalized manufacturing that only one sample can be collected without any additional labels. How to accurately identify anomalies base… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03876v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03876v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03876v1-abstract-full" style="display: none;"> Anomaly detection based on 3D point cloud data is an important research problem and receives more and more attention recently. Untrained anomaly detection based on only one sample is an emerging research problem motivated by real manufacturing industries such as personalized manufacturing that only one sample can be collected without any additional labels. How to accurately identify anomalies based on one 3D point cloud sample is a critical challenge in both industrial applications and the field of machine learning. This paper aims to provide a formal definition of untrained anomaly detection problem based on 3D point cloud data, discuss the differences between untrained anomaly detection and current unsupervised anomaly detection methods. Unlike unsupervised learning, untrained methods do not rely on any data, including unlabeled data. Instead, they leverage prior knowledge about the manufacturing surfaces and anomalies. Examples are used to illustrate these prior knowledge and untrained machine learning model. Afterwards, literature review on untrained anomaly detection based on 3D point cloud data is also provided, and the potential of untrained deep neural networks for anomaly detection is also discussed as outlooks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03876v1-abstract-full').style.display = 'none'; document.getElementById('2502.03876v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages,0 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03726">arXiv:2502.03726</a> <span> [<a href="https://arxiv.org/pdf/2502.03726">pdf</a>, <a href="https://arxiv.org/format/2502.03726">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DICE: Distilling Classifier-Free Guidance into Text Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+Z">Zhenyu Zhou</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Defang Chen</a>, <a href="/search/?searchtype=author&query=Wang%2C+C">Can Wang</a>, <a href="/search/?searchtype=author&query=Chen%2C+C">Chun Chen</a>, <a href="/search/?searchtype=author&query=Lyu%2C+S">Siwei Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03726v1-abstract-short" style="display: inline;"> Text-to-image diffusion models are capable of generating high-quality images, but these images often fail to align closely with the given text prompts. Classifier-free guidance (CFG) is a popular and effective technique for improving text-image alignment in the generative process. However, using CFG introduces significant computational overhead and deviates from the established theoretical foundat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03726v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03726v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03726v1-abstract-full" style="display: none;"> Text-to-image diffusion models are capable of generating high-quality images, but these images often fail to align closely with the given text prompts. Classifier-free guidance (CFG) is a popular and effective technique for improving text-image alignment in the generative process. However, using CFG introduces significant computational overhead and deviates from the established theoretical foundations of diffusion models. In this paper, we present DIstilling CFG by enhancing text Embeddings (DICE), a novel approach that removes the reliance on CFG in the generative process while maintaining the benefits it provides. DICE distills a CFG-based text-to-image diffusion model into a CFG-free version by refining text embeddings to replicate CFG-based directions. In this way, we avoid the computational and theoretical drawbacks of CFG, enabling high-quality, well-aligned image generation at a fast sampling speed. Extensive experiments on multiple Stable Diffusion v1.5 variants, SDXL and PixArt-$伪$ demonstrate the effectiveness of our method. Furthermore, DICE supports negative prompts for image editing to improve image quality further. Code will be available soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03726v1-abstract-full').style.display = 'none'; document.getElementById('2502.03726v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03629">arXiv:2502.03629</a> <span> [<a href="https://arxiv.org/pdf/2502.03629">pdf</a>, <a href="https://arxiv.org/format/2502.03629">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> REALEDIT: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Sushko%2C+P">Peter Sushko</a>, <a href="/search/?searchtype=author&query=Bharadwaj%2C+A">Ayana Bharadwaj</a>, <a href="/search/?searchtype=author&query=Lim%2C+Z+Y">Zhi Yang Lim</a>, <a href="/search/?searchtype=author&query=Ilin%2C+V">Vasily Ilin</a>, <a href="/search/?searchtype=author&query=Caffee%2C+B">Ben Caffee</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dongping Chen</a>, <a href="/search/?searchtype=author&query=Salehi%2C+M">Mohammadreza Salehi</a>, <a href="/search/?searchtype=author&query=Hsieh%2C+C">Cheng-Yu Hsieh</a>, <a href="/search/?searchtype=author&query=Krishna%2C+R">Ranjay Krishna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03629v1-abstract-short" style="display: inline;"> Existing image editing models struggle to meet real-world demands. Despite excelling in academic benchmarks, they have yet to be widely adopted for real user needs. Datasets that power these models use artificial edits, lacking the scale and ecological validity necessary to address the true diversity of user requests. We introduce REALEDIT, a large-scale image editing dataset with authentic user r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03629v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03629v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03629v1-abstract-full" style="display: none;"> Existing image editing models struggle to meet real-world demands. Despite excelling in academic benchmarks, they have yet to be widely adopted for real user needs. Datasets that power these models use artificial edits, lacking the scale and ecological validity necessary to address the true diversity of user requests. We introduce REALEDIT, a large-scale image editing dataset with authentic user requests and human-made edits sourced from Reddit. REALEDIT includes a test set of 9300 examples to evaluate models on real user requests. Our results show that existing models fall short on these tasks, highlighting the need for realistic training data. To address this, we introduce 48K training examples and train our REALEDIT model, achieving substantial gains - outperforming competitors by up to 165 Elo points in human judgment and 92 percent relative improvement on the automated VIEScore metric. We deploy our model on Reddit, testing it on new requests, and receive positive feedback. Beyond image editing, we explore REALEDIT's potential in detecting edited images by partnering with a deepfake detection non-profit. Finetuning their model on REALEDIT data improves its F1-score by 14 percentage points, underscoring the dataset's value for broad applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03629v1-abstract-full').style.display = 'none'; document.getElementById('2502.03629v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03606">arXiv:2502.03606</a> <span> [<a href="https://arxiv.org/pdf/2502.03606">pdf</a>, <a href="https://arxiv.org/format/2502.03606">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Artificial Intelligence Approaches for Anti-Addiction Drug Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/?searchtype=author&query=Jiang%2C+J">Jian Jiang</a>, <a href="/search/?searchtype=author&query=Su%2C+Z">Zhe Su</a>, <a href="/search/?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03606v2-abstract-short" style="display: inline;"> Drug addiction is a complex and pervasive global challenge that continues to pose significant public health concerns. Traditional approaches to anti-addiction drug discovery have struggled to deliver effective therapeutics, facing high attrition rates, long development timelines, and inefficiencies in processing large-scale data. Artificial intelligence (AI) has emerged as a transformative solutio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03606v2-abstract-full').style.display = 'inline'; document.getElementById('2502.03606v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03606v2-abstract-full" style="display: none;"> Drug addiction is a complex and pervasive global challenge that continues to pose significant public health concerns. Traditional approaches to anti-addiction drug discovery have struggled to deliver effective therapeutics, facing high attrition rates, long development timelines, and inefficiencies in processing large-scale data. Artificial intelligence (AI) has emerged as a transformative solution to address these issues. Using advanced algorithms, AI is revolutionizing drug discovery by enhancing the speed and precision of key processes. This review explores the transformative role of AI in the pipeline for anti-addiction drug discovery, including data collection, target identification, and compound optimization. By highlighting the potential of AI to overcome traditional barriers, this review systematically examines how AI addresses critical gaps in anti-addiction research, emphasizing its potential to revolutionize drug discovery and development, overcome challenges, and advance more effective therapeutic strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03606v2-abstract-full').style.display = 'none'; document.getElementById('2502.03606v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03277">arXiv:2502.03277</a> <span> [<a href="https://arxiv.org/pdf/2502.03277">pdf</a>, <a href="https://arxiv.org/format/2502.03277">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Phenomenology">hep-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> $螞_{c}(2910)$ and $螞_{c}(2940)$ productions in $蟺^{-} p$ scattering process </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Guo%2C+Q">Quan-Yun Guo</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dian-Yong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03277v1-abstract-short" style="display: inline;"> In the present work, we propose to investigate the productions of $螞_{c}(2910)$ and $螞_{c}(2940)$ in the $蟺^{-} p \rightarrow D^{-} D^{0} p$ process. The cross sections and differential cross sections depending on the $D^0 p$ invariant mass spectrum are estimated by utilizing an effective Lagrangian approach, where both $螞_{c}(2910)$ and $螞_{c}(2940)$ are considered as the $N D^{\ast}$ molecular s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03277v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03277v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03277v1-abstract-full" style="display: none;"> In the present work, we propose to investigate the productions of $螞_{c}(2910)$ and $螞_{c}(2940)$ in the $蟺^{-} p \rightarrow D^{-} D^{0} p$ process. The cross sections and differential cross sections depending on the $D^0 p$ invariant mass spectrum are estimated by utilizing an effective Lagrangian approach, where both $螞_{c}(2910)$ and $螞_{c}(2940)$ are considered as the $N D^{\ast}$ molecular states with the $J^{P}=1/2^{-}$, $3/2^{-}$, respectively. Our estimations indicate that the total cross sections are $141.6^{-109.6}_{+207.7}~\mathrm{渭b}$ when $p_蟺=15~\mathrm{GeV}$, where the uncertainties result from the variation of the cutoff parameter $螞_{r}$. By comparing the contributions of the $s$, $u$, and $t$ channels, we find that the $t$ channel plays the predominant role. Moreover, the present estimations suggest that the structure around 2.9 GeV in the $D^0 p$ invariant mass spectrum of the $蟺^{-} p \rightarrow D^{-} D^{0} p$ process should correspond to $螞_c(2910)$ rather than $螞_c(2940)$, which can be tested by further experimental measurements at J-PARC in the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03277v1-abstract-full').style.display = 'none'; document.getElementById('2502.03277v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02415">arXiv:2502.02415</a> <span> [<a href="https://arxiv.org/pdf/2502.02415">pdf</a>, <a href="https://arxiv.org/format/2502.02415">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Fast Graph Generation via Autoregressive Noisy Filtration Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Krimmel%2C+M">Markus Krimmel</a>, <a href="/search/?searchtype=author&query=Wiens%2C+J">Jenna Wiens</a>, <a href="/search/?searchtype=author&query=Borgwardt%2C+K">Karsten Borgwardt</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dexiong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02415v1-abstract-short" style="display: inline;"> Graph generative models often face a critical trade-off between learning complex distributions and achieving fast generation speed. We introduce Autoregressive Noisy Filtration Modeling (ANFM), a novel approach that addresses both challenges. ANFM leverages filtration, a concept from topological data analysis, to transform graphs into short sequences of monotonically increasing subgraphs. This for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02415v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02415v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02415v1-abstract-full" style="display: none;"> Graph generative models often face a critical trade-off between learning complex distributions and achieving fast generation speed. We introduce Autoregressive Noisy Filtration Modeling (ANFM), a novel approach that addresses both challenges. ANFM leverages filtration, a concept from topological data analysis, to transform graphs into short sequences of monotonically increasing subgraphs. This formulation extends the sequence families used in previous autoregressive models. To learn from these sequences, we propose a novel autoregressive graph mixer model. Our experiments suggest that exposure bias might represent a substantial hurdle in autoregressive graph generation and we introduce two mitigation strategies to address it: noise augmentation and a reinforcement learning approach. Incorporating these techniques leads to substantial performance gains, making ANFM competitive with state-of-the-art diffusion models across diverse synthetic and real-world datasets. Notably, ANFM produces remarkably short sequences, achieving a 100-fold speedup in generation time compared to diffusion models. This work marks a significant step toward high-throughput graph generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02415v1-abstract-full').style.display = 'none'; document.getElementById('2502.02415v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 27 tables, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02277">arXiv:2502.02277</a> <span> [<a href="https://arxiv.org/pdf/2502.02277">pdf</a>, <a href="https://arxiv.org/format/2502.02277">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Error Distribution Smoothing:Advancing Low-Dimensional Imbalanced Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Donghe Chen</a>, <a href="/search/?searchtype=author&query=Yue%2C+J">Jiaxuan Yue</a>, <a href="/search/?searchtype=author&query=Zheng%2C+T">Tengjie Zheng</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Lanxuan Wang</a>, <a href="/search/?searchtype=author&query=Cheng%2C+L">Lin Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02277v1-abstract-short" style="display: inline;"> In real-world regression tasks, datasets frequently exhibit imbalanced distributions, characterized by a scarcity of data in high-complexity regions and an abundance in low-complexity areas. This imbalance presents significant challenges for existing classification methods with clear class boundaries, while highlighting a scarcity of approaches specifically designed for imbalanced regression probl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02277v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02277v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02277v1-abstract-full" style="display: none;"> In real-world regression tasks, datasets frequently exhibit imbalanced distributions, characterized by a scarcity of data in high-complexity regions and an abundance in low-complexity areas. This imbalance presents significant challenges for existing classification methods with clear class boundaries, while highlighting a scarcity of approaches specifically designed for imbalanced regression problems. To better address these issues, we introduce a novel concept of Imbalanced Regression, which takes into account both the complexity of the problem and the density of data points, extending beyond traditional definitions that focus only on data density. Furthermore, we propose Error Distribution Smoothing (EDS) as a solution to tackle imbalanced regression, effectively selecting a representative subset from the dataset to reduce redundancy while maintaining balance and representativeness. Through several experiments, EDS has shown its effectiveness, and the related code and dataset can be accessed at https://anonymous.4open.science/r/Error-Distribution-Smoothing-762F. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02277v1-abstract-full').style.display = 'none'; document.getElementById('2502.02277v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02265">arXiv:2502.02265</a> <span> [<a href="https://arxiv.org/pdf/2502.02265">pdf</a>, <a href="https://arxiv.org/format/2502.02265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Adviser-Actor-Critic: Eliminating Steady-State Error in Reinforcement Learning Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Donghe Chen</a>, <a href="/search/?searchtype=author&query=Peng%2C+Y">Yubin Peng</a>, <a href="/search/?searchtype=author&query=Zheng%2C+T">Tengjie Zheng</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Han Wang</a>, <a href="/search/?searchtype=author&query=Qu%2C+C">Chaoran Qu</a>, <a href="/search/?searchtype=author&query=Cheng%2C+L">Lin Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02265v1-abstract-short" style="display: inline;"> High-precision control tasks present substantial challenges for reinforcement learning (RL) algorithms, frequently resulting in suboptimal performance attributed to network approximation inaccuracies and inadequate sample quality.These issues are exacerbated when the task requires the agent to achieve a precise goal state, as is common in robotics and other real-world applications.We introduce Adv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02265v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02265v1-abstract-full" style="display: none;"> High-precision control tasks present substantial challenges for reinforcement learning (RL) algorithms, frequently resulting in suboptimal performance attributed to network approximation inaccuracies and inadequate sample quality.These issues are exacerbated when the task requires the agent to achieve a precise goal state, as is common in robotics and other real-world applications.We introduce Adviser-Actor-Critic (AAC), designed to address the precision control dilemma by combining the precision of feedback control theory with the adaptive learning capability of RL and featuring an Adviser that mentors the actor to refine control actions, thereby enhancing the precision of goal attainment.Finally, through benchmark tests, AAC outperformed standard RL algorithms in precision-critical, goal-conditioned tasks, demonstrating AAC's high precision, reliability, and robustness.Code are available at: https://anonymous.4open.science/r/Adviser-Actor-Critic-8AC5. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02265v1-abstract-full').style.display = 'none'; document.getElementById('2502.02265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02216">arXiv:2502.02216</a> <span> [<a href="https://arxiv.org/pdf/2502.02216">pdf</a>, <a href="https://arxiv.org/format/2502.02216">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Flatten Graphs as Sequences: Transformers are Scalable Graph Generators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Dexiong Chen</a>, <a href="/search/?searchtype=author&query=Krimmel%2C+M">Markus Krimmel</a>, <a href="/search/?searchtype=author&query=Borgwardt%2C+K">Karsten Borgwardt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02216v1-abstract-short" style="display: inline;"> We introduce AutoGraph, a novel autoregressive framework for generating large attributed graphs using decoder-only transformers. At the core of our approach is a reversible "flattening" process that transforms graphs into random sequences. By sampling and learning from these sequences, AutoGraph enables transformers to model and generate complex graph structures in a manner akin to natural languag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02216v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02216v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02216v1-abstract-full" style="display: none;"> We introduce AutoGraph, a novel autoregressive framework for generating large attributed graphs using decoder-only transformers. At the core of our approach is a reversible "flattening" process that transforms graphs into random sequences. By sampling and learning from these sequences, AutoGraph enables transformers to model and generate complex graph structures in a manner akin to natural language. In contrast to diffusion models that rely on computationally intensive node features, our approach operates exclusively on these sequences. The sampling complexity and sequence length scale linearly with the number of edges, making AutoGraph highly scalable for generating large sparse graphs. Empirically, AutoGraph achieves state-of-the-art performance across diverse synthetic and molecular graph generation benchmarks, while delivering a 100-fold generation and a 3-fold training speedup compared to leading diffusion models. Additionally, it demonstrates promising transfer capabilities and supports substructure-conditioned generation without additional fine-tuning. By extending language modeling techniques to graph generation, this work paves the way for developing graph foundation models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02216v1-abstract-full').style.display = 'none'; document.getElementById('2502.02216v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01387">arXiv:2502.01387</a> <span> [<a href="https://arxiv.org/pdf/2502.01387">pdf</a>, <a href="https://arxiv.org/format/2502.01387">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> TeLL-Drive: Enhancing Autonomous Driving with Teacher LLM-Guided Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xu%2C+C">Chengkai Xu</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jiaqi Liu</a>, <a href="/search/?searchtype=author&query=Fang%2C+S">Shiyu Fang</a>, <a href="/search/?searchtype=author&query=Cui%2C+Y">Yiming Cui</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/?searchtype=author&query=Hang%2C+P">Peng Hang</a>, <a href="/search/?searchtype=author&query=Sun%2C+J">Jian Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01387v3-abstract-short" style="display: inline;"> Although Deep Reinforcement Learning (DRL) and Large Language Models (LLMs) each show promise in addressing decision-making challenges in autonomous driving, DRL often suffers from high sample complexity, while LLMs have difficulty ensuring real-time decision making. To address these limitations, we propose TeLL-Drive, a hybrid framework that integrates a Teacher LLM to guide an attention-based St… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01387v3-abstract-full').style.display = 'inline'; document.getElementById('2502.01387v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01387v3-abstract-full" style="display: none;"> Although Deep Reinforcement Learning (DRL) and Large Language Models (LLMs) each show promise in addressing decision-making challenges in autonomous driving, DRL often suffers from high sample complexity, while LLMs have difficulty ensuring real-time decision making. To address these limitations, we propose TeLL-Drive, a hybrid framework that integrates a Teacher LLM to guide an attention-based Student DRL policy. By incorporating risk metrics, historical scenario retrieval, and domain heuristics into context-rich prompts, the LLM produces high-level driving strategies through chain-of-thought reasoning. A self-attention mechanism then fuses these strategies with the DRL agent's exploration, accelerating policy convergence and boosting robustness across diverse driving conditions. The experimental results, evaluated across multiple traffic scenarios, show that TeLL-Drive outperforms existing baseline methods, including other LLM-based approaches, in terms of success rates, average returns, and real-time feasibility. Ablation studies underscore the importance of each model component, especially the synergy between the attention mechanism and LLM-driven guidance. Finally, we build a virtual-real fusion experimental platform to verify the real-time performance, robustness, and reliability of the algorithm running on real vehicles through vehicle-in-loop experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01387v3-abstract-full').style.display = 'none'; document.getElementById('2502.01387v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01297">arXiv:2502.01297</a> <span> [<a href="https://arxiv.org/pdf/2502.01297">pdf</a>, <a href="https://arxiv.org/format/2502.01297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> XR-VIO: High-precision Visual Inertial Odometry with Fast Initialization for XR Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhai%2C+S">Shangjin Zhai</a>, <a href="/search/?searchtype=author&query=Wang%2C+N">Nan Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xiaomeng Wang</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Danpeng Chen</a>, <a href="/search/?searchtype=author&query=Xie%2C+W">Weijian Xie</a>, <a href="/search/?searchtype=author&query=Bao%2C+H">Hujun Bao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+G">Guofeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01297v1-abstract-short" style="display: inline;"> This paper presents a novel approach to Visual Inertial Odometry (VIO), focusing on the initialization and feature matching modules. Existing methods for initialization often suffer from either poor stability in visual Structure from Motion (SfM) or fragility in solving a huge number of parameters simultaneously. To address these challenges, we propose a new pipeline for visual inertial initializa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01297v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01297v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01297v1-abstract-full" style="display: none;"> This paper presents a novel approach to Visual Inertial Odometry (VIO), focusing on the initialization and feature matching modules. Existing methods for initialization often suffer from either poor stability in visual Structure from Motion (SfM) or fragility in solving a huge number of parameters simultaneously. To address these challenges, we propose a new pipeline for visual inertial initialization that robustly handles various complex scenarios. By tightly coupling gyroscope measurements, we enhance the robustness and accuracy of visual SfM. Our method demonstrates stable performance even with only four image frames, yielding competitive results. In terms of feature matching, we introduce a hybrid method that combines optical flow and descriptor-based matching. By leveraging the robustness of continuous optical flow tracking and the accuracy of descriptor matching, our approach achieves efficient, accurate, and robust tracking results. Through evaluation on multiple benchmarks, our method demonstrates state-of-the-art performance in terms of accuracy and success rate. Additionally, a video demonstration on mobile devices showcases the practical applicability of our approach in the field of Augmented Reality/Virtual Reality (AR/VR). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01297v1-abstract-full').style.display = 'none'; document.getElementById('2502.01297v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01105">arXiv:2502.01105</a> <span> [<a href="https://arxiv.org/pdf/2502.01105">pdf</a>, <a href="https://arxiv.org/format/2502.01105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LayerTracer: Cognitive-Aligned Layered SVG Synthesis via Diffusion Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Song%2C+Y">Yiren Song</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Danze Chen</a>, <a href="/search/?searchtype=author&query=Shou%2C+M+Z">Mike Zheng Shou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01105v1-abstract-short" style="display: inline;"> Generating cognitive-aligned layered SVGs remains challenging due to existing methods' tendencies toward either oversimplified single-layer outputs or optimization-induced shape redundancies. We propose LayerTracer, a diffusion transformer based framework that bridges this gap by learning designers' layered SVG creation processes from a novel dataset of sequential design operations. Our approach o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01105v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01105v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01105v1-abstract-full" style="display: none;"> Generating cognitive-aligned layered SVGs remains challenging due to existing methods' tendencies toward either oversimplified single-layer outputs or optimization-induced shape redundancies. We propose LayerTracer, a diffusion transformer based framework that bridges this gap by learning designers' layered SVG creation processes from a novel dataset of sequential design operations. Our approach operates in two phases: First, a text-conditioned DiT generates multi-phase rasterized construction blueprints that simulate human design workflows. Second, layer-wise vectorization with path deduplication produces clean, editable SVGs. For image vectorization, we introduce a conditional diffusion mechanism that encodes reference images into latent tokens, guiding hierarchical reconstruction while preserving structural integrity. Extensive experiments demonstrate LayerTracer's superior performance against optimization-based and neural baselines in both generation quality and editability, effectively aligning AI-generated vectors with professional design cognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01105v1-abstract-full').style.display = 'none'; document.getElementById('2502.01105v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00442">arXiv:2502.00442</a> <span> [<a href="https://arxiv.org/pdf/2502.00442">pdf</a>, <a href="https://arxiv.org/format/2502.00442">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Earth and Planetary Astrophysics">astro-ph.EP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Solar and Stellar Astrophysics">astro-ph.SR</span> </div> </div> <p class="title is-5 mathjax"> Short-Period Small Planets with High Mutual Inclinations are more Common around Metal-Rich Stars </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hua%2C+X">Xinyan Hua</a>, <a href="/search/?searchtype=author&query=Wang%2C+S+X">Sharon Xuesong Wang</a>, <a href="/search/?searchtype=author&query=An%2C+D">Dongsheng An</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">Songhu Wang</a>, <a href="/search/?searchtype=author&query=Huang%2C+Y">Yang Huang</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dichang Chen</a>, <a href="/search/?searchtype=author&query=Buchner%2C+J">Johannes Buchner</a>, <a href="/search/?searchtype=author&query=Zhu%2C+W">Wei Zhu</a>, <a href="/search/?searchtype=author&query=Dai%2C+F">Fei Dai</a>, <a href="/search/?searchtype=author&query=Xie%2C+J">Jiwei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00442v2-abstract-short" style="display: inline;"> We present a correlation between the stellar metallicities and the mutual inclinations of multi-planet systems hosting short-period small planets (a/Rs<12, Rp<4Re). We analyzed 89 multi-planet systems discovered by Kepler, K2, and TESS, where the innermost planets have periods shorter than 10 days. We found that the mutual inclinations of the innermost two planets are higher and more diverse aroun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00442v2-abstract-full').style.display = 'inline'; document.getElementById('2502.00442v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00442v2-abstract-full" style="display: none;"> We present a correlation between the stellar metallicities and the mutual inclinations of multi-planet systems hosting short-period small planets (a/Rs<12, Rp<4Re). We analyzed 89 multi-planet systems discovered by Kepler, K2, and TESS, where the innermost planets have periods shorter than 10 days. We found that the mutual inclinations of the innermost two planets are higher and more diverse around metal-rich stars. The mutual inclinations are calculated as the absolute differences between the best-fit inclinations of the innermost two planets from transit modeling, which represent the lower limits of the true mutual inclinations. The mean and variance of the mutual inclination distribution of the metal-rich systems are 3.1+-0.5 and 3.1+-0.4 degrees, while for the metal-poor systems they are 1.3+-0.2 and 1.0+-0.2 degrees. This finding suggests that inner planetary systems around metal-rich stars are dynamically hotter. We summarized the theories that could plausibly explain this correlation, including the influence of giant planets, higher solid densities in protoplanetary disks around metal-rich stars, or secular chaos coupled with an excess of angular momentum deficits. Planet formation and population synthesis models tracking the mutual inclination evolution would be essential to fully understand this correlation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00442v2-abstract-full').style.display = 'none'; document.getElementById('2502.00442v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 5 figures, 1 table. Accepted by ApJL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00262">arXiv:2502.00262</a> <span> [<a href="https://arxiv.org/pdf/2502.00262">pdf</a>, <a href="https://arxiv.org/format/2502.00262">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> INSIGHT: Enhancing Autonomous Driving Safety through Vision-Language Models on Context-Aware Hazard Detection and Edge Case Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Dianwei Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zifan Zhang</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yuchen Liu</a>, <a href="/search/?searchtype=author&query=Yang%2C+X+T">Xianfeng Terry Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00262v2-abstract-short" style="display: inline;"> Autonomous driving systems face significant challenges in handling unpredictable edge-case scenarios, such as adversarial pedestrian movements, dangerous vehicle maneuvers, and sudden environmental changes. Current end-to-end driving models struggle with generalization to these rare events due to limitations in traditional detection and prediction approaches. To address this, we propose INSIGHT (I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00262v2-abstract-full').style.display = 'inline'; document.getElementById('2502.00262v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00262v2-abstract-full" style="display: none;"> Autonomous driving systems face significant challenges in handling unpredictable edge-case scenarios, such as adversarial pedestrian movements, dangerous vehicle maneuvers, and sudden environmental changes. Current end-to-end driving models struggle with generalization to these rare events due to limitations in traditional detection and prediction approaches. To address this, we propose INSIGHT (Integration of Semantic and Visual Inputs for Generalized Hazard Tracking), a hierarchical vision-language model (VLM) framework designed to enhance hazard detection and edge-case evaluation. By using multimodal data fusion, our approach integrates semantic and visual representations, enabling precise interpretation of driving scenarios and accurate forecasting of potential dangers. Through supervised fine-tuning of VLMs, we optimize spatial hazard localization using attention-based mechanisms and coordinate regression techniques. Experimental results on the BDD100K dataset demonstrate a substantial improvement in hazard prediction straightforwardness and accuracy over existing models, achieving a notable increase in generalization performance. This advancement enhances the robustness and safety of autonomous driving systems, ensuring improved situational awareness and potential decision-making in complex real-world scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00262v2-abstract-full').style.display = 'none'; document.getElementById('2502.00262v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18804">arXiv:2501.18804</a> <span> [<a href="https://arxiv.org/pdf/2501.18804">pdf</a>, <a href="https://arxiv.org/format/2501.18804">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Zero-Shot Novel View and Depth Synthesis with Multi-View Geometric Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Guizilini%2C+V">Vitor Guizilini</a>, <a href="/search/?searchtype=author&query=Irshad%2C+M+Z">Muhammad Zubair Irshad</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dian Chen</a>, <a href="/search/?searchtype=author&query=Shakhnarovich%2C+G">Greg Shakhnarovich</a>, <a href="/search/?searchtype=author&query=Ambrus%2C+R">Rares Ambrus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18804v1-abstract-short" style="display: inline;"> Current methods for 3D scene reconstruction from sparse posed images employ intermediate 3D representations such as neural fields, voxel grids, or 3D Gaussians, to achieve multi-view consistent scene appearance and geometry. In this paper we introduce MVGD, a diffusion-based architecture capable of direct pixel-level generation of images and depth maps from novel viewpoints, given an arbitrary num… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18804v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18804v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18804v1-abstract-full" style="display: none;"> Current methods for 3D scene reconstruction from sparse posed images employ intermediate 3D representations such as neural fields, voxel grids, or 3D Gaussians, to achieve multi-view consistent scene appearance and geometry. In this paper we introduce MVGD, a diffusion-based architecture capable of direct pixel-level generation of images and depth maps from novel viewpoints, given an arbitrary number of input views. Our method uses raymap conditioning to both augment visual features with spatial information from different viewpoints, as well as to guide the generation of images and depth maps from novel views. A key aspect of our approach is the multi-task generation of images and depth maps, using learnable task embeddings to guide the diffusion process towards specific modalities. We train this model on a collection of more than 60 million multi-view samples from publicly available datasets, and propose techniques to enable efficient and consistent learning in such diverse conditions. We also propose a novel strategy that enables the efficient training of larger models by incrementally fine-tuning smaller ones, with promising scaling behavior. Through extensive experiments, we report state-of-the-art results in multiple novel view synthesis benchmarks, as well as multi-view stereo and video depth estimation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18804v1-abstract-full').style.display = 'none'; document.getElementById('2501.18804v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://mvgd.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18672">arXiv:2501.18672</a> <span> [<a href="https://arxiv.org/pdf/2501.18672">pdf</a>, <a href="https://arxiv.org/format/2501.18672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Drag Your Gaussian: Effective Drag-Based Editing with Score Distillation for 3D Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Qu%2C+Y">Yansong Qu</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dian Chen</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xinyang Li</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xiaofan Li</a>, <a href="/search/?searchtype=author&query=Zhang%2C+S">Shengchuan Zhang</a>, <a href="/search/?searchtype=author&query=Cao%2C+L">Liujuan Cao</a>, <a href="/search/?searchtype=author&query=Ji%2C+R">Rongrong Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18672v2-abstract-short" style="display: inline;"> Recent advancements in 3D scene editing have been propelled by the rapid development of generative models. Existing methods typically utilize generative models to perform text-guided editing on 3D representations, such as 3D Gaussian Splatting (3DGS). However, these methods are often limited to texture modifications and fail when addressing geometric changes, such as editing a character's head to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18672v2-abstract-full').style.display = 'inline'; document.getElementById('2501.18672v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18672v2-abstract-full" style="display: none;"> Recent advancements in 3D scene editing have been propelled by the rapid development of generative models. Existing methods typically utilize generative models to perform text-guided editing on 3D representations, such as 3D Gaussian Splatting (3DGS). However, these methods are often limited to texture modifications and fail when addressing geometric changes, such as editing a character's head to turn around. Moreover, such methods lack accurate control over the spatial position of editing results, as language struggles to precisely describe the extent of edits. To overcome these limitations, we introduce DYG, an effective 3D drag-based editing method for 3D Gaussian Splatting. It enables users to conveniently specify the desired editing region and the desired dragging direction through the input of 3D masks and pairs of control points, thereby enabling precise control over the extent of editing. DYG integrates the strengths of the implicit triplane representation to establish the geometric scaffold of the editing results, effectively overcoming suboptimal editing outcomes caused by the sparsity of 3DGS in the desired editing regions. Additionally, we incorporate a drag-based Latent Diffusion Model into our method through the proposed Drag-SDS loss function, enabling flexible, multi-view consistent, and fine-grained editing. Extensive experiments demonstrate that DYG conducts effective drag-based editing guided by control point prompts, surpassing other baselines in terms of editing effect and quality, both qualitatively and quantitatively. Visit our project page at https://quyans.github.io/Drag-Your-Gaussian. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18672v2-abstract-full').style.display = 'none'; document.getElementById('2501.18672v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Visit our project page at https://quyans.github.io/Drag-Your-Gaussian</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16999">arXiv:2501.16999</a> <span> [<a href="https://arxiv.org/pdf/2501.16999">pdf</a>, <a href="https://arxiv.org/format/2501.16999">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Theory">hep-th</span> </div> </div> <p class="title is-5 mathjax"> Lyapunov exponents as probes for phase transitions of Kerr-AdS black holes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Deyou Chen</a>, <a href="/search/?searchtype=author&query=Yang%2C+C">Chuang Yang</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yongtao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16999v3-abstract-short" style="display: inline;"> In this paper, we study proper time Lyapunov exponents and coordinate time Lyapunov exponents of chaos for both massless and massive particles orbiting four-dimensional and five-dimensional Kerr-AdS black holes, and explore their relationships with phase transitions of these black holes. The results reveal that these exponents can reflect the occurrence of phase transitions. Specifically, when com… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16999v3-abstract-full').style.display = 'inline'; document.getElementById('2501.16999v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16999v3-abstract-full" style="display: none;"> In this paper, we study proper time Lyapunov exponents and coordinate time Lyapunov exponents of chaos for both massless and massive particles orbiting four-dimensional and five-dimensional Kerr-AdS black holes, and explore their relationships with phase transitions of these black holes. The results reveal that these exponents can reflect the occurrence of phase transitions. Specifically, when compared to the Lyapunov exponents of massive particles in chaotic states, the exponents corresponding to massless particles demonstrate a more robust capability in describing the phase transitions. Furthermore, we conduct a study on critical exponents associated with the Lyapunov exponents in these black holes, identifying a critical exponent value of 1/2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16999v3-abstract-full').style.display = 'none'; document.getElementById('2501.16999v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16665">arXiv:2501.16665</a> <span> [<a href="https://arxiv.org/pdf/2501.16665">pdf</a>, <a href="https://arxiv.org/format/2501.16665">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CSPCL: Category Semantic Prior Contrastive Learning for Deformable DETR-Based Prohibited Item Detectors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+M">Mingyuan Li</a>, <a href="/search/?searchtype=author&query=Jia%2C+T">Tong Jia</a>, <a href="/search/?searchtype=author&query=Lu%2C+H">Hui Lu</a>, <a href="/search/?searchtype=author&query=Ma%2C+B">Bowen Ma</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dongyue Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16665v1-abstract-short" style="display: inline;"> Prohibited item detection based on X-ray images is one of the most effective security inspection methods. However, the foreground-background feature coupling caused by the overlapping phenomenon specific to X-ray images makes general detectors designed for natural images perform poorly. To address this issue, we propose a Category Semantic Prior Contrastive Learning (CSPCL) mechanism, which aligns… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16665v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16665v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16665v1-abstract-full" style="display: none;"> Prohibited item detection based on X-ray images is one of the most effective security inspection methods. However, the foreground-background feature coupling caused by the overlapping phenomenon specific to X-ray images makes general detectors designed for natural images perform poorly. To address this issue, we propose a Category Semantic Prior Contrastive Learning (CSPCL) mechanism, which aligns the class prototypes perceived by the classifier with the content queries to correct and supplement the missing semantic information responsible for classification, thereby enhancing the model sensitivity to foreground features.To achieve this alignment, we design a specific contrastive loss, CSP loss, which includes Intra-Class Truncated Attraction (ITA) loss and Inter-Class Adaptive Repulsion (IAR) loss, and outperforms classic N-pair loss and InfoNCE loss. Specifically, ITA loss leverages class prototypes to attract intra-class category-specific content queries while preserving necessary distinctiveness. IAR loss utilizes class prototypes to adaptively repel inter-class category-specific content queries based on the similarity between class prototypes, helping disentangle features of similar categories.CSPCL is general and can be easily integrated into Deformable DETR-based models. Extensive experiments on the PIXray and OPIXray datasets demonstrate that CSPCL significantly enhances the performance of various state-of-the-art models without increasing complexity.The code will be open source once the paper is accepted. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16665v1-abstract-full').style.display = 'none'; document.getElementById('2501.16665v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16124">arXiv:2501.16124</a> <span> [<a href="https://arxiv.org/pdf/2501.16124">pdf</a>, <a href="https://arxiv.org/format/2501.16124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Phenomenology">hep-ph</span> </div> </div> <p class="title is-5 mathjax"> Two-body Hidden Charm Decays of $D$ Wave Charmonia </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Qi%2C+X">Xiao-Yu Qi</a>, <a href="/search/?searchtype=author&query=Wu%2C+Q">Qi Wu</a>, <a href="/search/?searchtype=author&query=Guo%2C+X">Xing-Dao Guo</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dian-Yong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16124v1-abstract-short" style="display: inline;"> The experimental observations of $蠄_2(3823)$ and $蠄_3(3842)$ make $D$ wave charmonia family abundant. In the present work, we investigate the hidden charm decay processes of spin triplets of the $D$-wave charmonia with the meson loop mechanism. The model parameter $伪_螞$ is determined by reproducing the branching fraction of $蠄(3770)\to J/蠄畏$. With this range of model parameter values, the branchin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16124v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16124v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16124v1-abstract-full" style="display: none;"> The experimental observations of $蠄_2(3823)$ and $蠄_3(3842)$ make $D$ wave charmonia family abundant. In the present work, we investigate the hidden charm decay processes of spin triplets of the $D$-wave charmonia with the meson loop mechanism. The model parameter $伪_螞$ is determined by reproducing the branching fraction of $蠄(3770)\to J/蠄畏$. With this range of model parameter values, the branching fractions (partial widths) of $蠄(3770) \to 畏_c 蠅$, $蠄_2(3823)/蠄_3(3842) \to J/蠄畏$, $蠄_2(3823)/蠄_3(3842) \to 畏_c 蠅$ are estimated. Our estimations find that the partial width of $蠄_2(3823) \to J/蠄畏$ is $\left(29.64^{+4.01}_{-4.63}\right)\ \mathrm{keV}$, and the partial width ratio of $蠄_2(3823) \to J/蠄畏$ relative to $蠄_2(3823)\to 纬蠂_{c1}$ is about $10\%$, which could be tested by further precise measurements from the BESIII, Belle II and LHCb Collaborations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16124v1-abstract-full').style.display = 'none'; document.getElementById('2501.16124v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 9 figures. Accepted for publication in The European Physical Journal C</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15770">arXiv:2501.15770</a> <span> [<a href="https://arxiv.org/pdf/2501.15770">pdf</a>, <a href="https://arxiv.org/format/2501.15770">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Walk in Their Shoes to Navigate Your Own Path: Learning About Procrastination Through A Serious Game </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+R">Runhua Zhang</a>, <a href="/search/?searchtype=author&query=Gan%2C+J">Jiaqi Gan</a>, <a href="/search/?searchtype=author&query=Gao%2C+S">Shangyuan Gao</a>, <a href="/search/?searchtype=author&query=Chen%2C+S">Siyi Chen</a>, <a href="/search/?searchtype=author&query=Wu%2C+X">Xinyu Wu</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/?searchtype=author&query=Tian%2C+Y">Yulin Tian</a>, <a href="/search/?searchtype=author&query=Wang%2C+Q">Qi Wang</a>, <a href="/search/?searchtype=author&query=An%2C+P">Pengcheng An</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15770v1-abstract-short" style="display: inline;"> Procrastination, the voluntary delay of tasks despite potential negative consequences, has prompted numerous time and task management interventions in the HCI community. While these interventions have shown promise in addressing specific behaviors, psychological theories suggest that learning about procrastination itself may help individuals develop their own coping strategies and build mental res… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15770v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15770v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15770v1-abstract-full" style="display: none;"> Procrastination, the voluntary delay of tasks despite potential negative consequences, has prompted numerous time and task management interventions in the HCI community. While these interventions have shown promise in addressing specific behaviors, psychological theories suggest that learning about procrastination itself may help individuals develop their own coping strategies and build mental resilience. However, little research has explored how to support this learning process through HCI approaches. We present ProcrastiMate, a text adventure game where players learn about procrastination's causes and experiment with coping strategies by guiding in-game characters in managing relatable scenarios. Our field study with 27 participants revealed that ProcrastiMate facilitated learning and self-reflection while maintaining psychological distance, motivating players to integrate newly acquired knowledge in daily life. This paper contributes empirical insights on leveraging serious games to facilitate learning about procrastination and offers design implications for addressing psychological challenges through HCI approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15770v1-abstract-full').style.display = 'none'; document.getElementById('2501.15770v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15534">arXiv:2501.15534</a> <span> [<a href="https://arxiv.org/pdf/2501.15534">pdf</a>, <a href="https://arxiv.org/ps/2501.15534">ps</a>, <a href="https://arxiv.org/format/2501.15534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Phenomenology">hep-ph</span> </div> </div> <p class="title is-5 mathjax"> Systematic analysis of the form factors of $B_{c}$ to $P$-wave charmonia and corresponding weak decays </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lu%2C+J">Jie Lu</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dian-Yong Chen</a>, <a href="/search/?searchtype=author&query=Yu%2C+G">Guo-Liang Yu</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhi-Gang Wang</a>, <a href="/search/?searchtype=author&query=Wu%2C+B">Bin Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15534v1-abstract-short" style="display: inline;"> In this article, the vector, axial vector and tensor form factors of $B_{c}\to 蠂_{cJ}$ ($J=0,1,2$) and $B_{c}\to h_{c}$ are analyzed within the framework of three-point QCD sum rules. With the calculated vector and axial vector form factors, we directly study the decay widths and branching ratios of semileptonic decays $B_{c}^{-}\to 蠂_{cJ}l \bar谓_l, h_{c}l \bar谓_l$ $(l=e, 渭$ and $蟿)$ and analyze t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15534v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15534v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15534v1-abstract-full" style="display: none;"> In this article, the vector, axial vector and tensor form factors of $B_{c}\to 蠂_{cJ}$ ($J=0,1,2$) and $B_{c}\to h_{c}$ are analyzed within the framework of three-point QCD sum rules. With the calculated vector and axial vector form factors, we directly study the decay widths and branching ratios of semileptonic decays $B_{c}^{-}\to 蠂_{cJ}l \bar谓_l, h_{c}l \bar谓_l$ $(l=e, 渭$ and $蟿)$ and analyze the nonleptonic decays $B_{c}^{-}\to 蠂_{cJ}蟺^{-}, 蠂_{cJ}K^{-}, 蠂_{cJ}蟻^{-}, 蠂_{cJ}K^{*-}$, $B_{c}^{-}\to h_{c}蟺^{-}, h_{c}K^{-}, h_{c}蟻^{-}, h_{c}K^{*-}$ by using the naive factorization approach (NFA). These results can provide more information to understand the properties of $B_{c}$ meson and $P$-wave charmonia and to study the heavy quark dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15534v1-abstract-full').style.display = 'none'; document.getElementById('2501.15534v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15062">arXiv:2501.15062</a> <span> [<a href="https://arxiv.org/pdf/2501.15062">pdf</a>, <a href="https://arxiv.org/format/2501.15062">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exact Fit Attention in Node-Holistic Graph Convolutional Network for Improved EEG-Based Driver Fatigue Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xu%2C+M">Meiyan Xu</a>, <a href="/search/?searchtype=author&query=Chen%2C+Q">Qingqing Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Duo Chen</a>, <a href="/search/?searchtype=author&query=Ding%2C+Y">Yi Ding</a>, <a href="/search/?searchtype=author&query=Wang%2C+J">Jingyuan Wang</a>, <a href="/search/?searchtype=author&query=Gu%2C+P">Peipei Gu</a>, <a href="/search/?searchtype=author&query=Pan%2C+Y">Yijie Pan</a>, <a href="/search/?searchtype=author&query=Huang%2C+D">Deshuang Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xun Zhang</a>, <a href="/search/?searchtype=author&query=Guo%2C+J">Jiayang Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15062v1-abstract-short" style="display: inline;"> EEG-based fatigue monitoring can effectively reduce the incidence of related traffic accidents. In the past decade, with the advancement of deep learning, convolutional neural networks (CNN) have been increasingly used for EEG signal processing. However, due to the data's non-Euclidean characteristics, existing CNNs may lose important spatial information from EEG, specifically channel correlation.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15062v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15062v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15062v1-abstract-full" style="display: none;"> EEG-based fatigue monitoring can effectively reduce the incidence of related traffic accidents. In the past decade, with the advancement of deep learning, convolutional neural networks (CNN) have been increasingly used for EEG signal processing. However, due to the data's non-Euclidean characteristics, existing CNNs may lose important spatial information from EEG, specifically channel correlation. Thus, we propose the node-holistic graph convolutional network (NHGNet), a model that uses graphic convolution to dynamically learn each channel's features. With exact fit attention optimization, the network captures inter-channel correlations through a trainable adjacency matrix. The interpretability is enhanced by revealing critical areas of brain activity and their interrelations in various mental states. In validations on two public datasets, NHGNet outperforms the SOTAs. Specifically, in the intra-subject, NHGNet improved detection accuracy by at least 2.34% and 3.42%, and in the inter-subjects, it improved by at least 2.09% and 15.06%. Visualization research on the model revealed that the central parietal area plays an important role in detecting fatigue levels, whereas the frontal and temporal lobes are essential for maintaining vigilance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15062v1-abstract-full').style.display = 'none'; document.getElementById('2501.15062v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14995">arXiv:2501.14995</a> <span> [<a href="https://arxiv.org/pdf/2501.14995">pdf</a>, <a href="https://arxiv.org/format/2501.14995">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GreenAuto: An Automated Platform for Sustainable AI Model Design on Edge Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tu%2C+X">Xiaolong Tu</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dawei Chen</a>, <a href="/search/?searchtype=author&query=Han%2C+K">Kyungtae Han</a>, <a href="/search/?searchtype=author&query=Altintas%2C+O">Onur Altintas</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Haoxin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14995v1-abstract-short" style="display: inline;"> We present GreenAuto, an end-to-end automated platform designed for sustainable AI model exploration, generation, deployment, and evaluation. GreenAuto employs a Pareto front-based search method within an expanded neural architecture search (NAS) space, guided by gradient descent to optimize model exploration. Pre-trained kernel-level energy predictors estimate energy consumption across all models… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14995v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14995v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14995v1-abstract-full" style="display: none;"> We present GreenAuto, an end-to-end automated platform designed for sustainable AI model exploration, generation, deployment, and evaluation. GreenAuto employs a Pareto front-based search method within an expanded neural architecture search (NAS) space, guided by gradient descent to optimize model exploration. Pre-trained kernel-level energy predictors estimate energy consumption across all models, providing a global view that directs the search toward more sustainable solutions. By automating performance measurements and iteratively refining the search process, GreenAuto demonstrates the efficient identification of sustainable AI models without the need for human intervention. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14995v1-abstract-full').style.display = 'none'; document.getElementById('2501.14995v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14755">arXiv:2501.14755</a> <span> [<a href="https://arxiv.org/pdf/2501.14755">pdf</a>, <a href="https://arxiv.org/format/2501.14755">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Data-Juicer 2.0: Cloud-Scale Adaptive Data Processing for Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+D">Daoyuan Chen</a>, <a href="/search/?searchtype=author&query=Huang%2C+Y">Yilun Huang</a>, <a href="/search/?searchtype=author&query=Pan%2C+X">Xuchen Pan</a>, <a href="/search/?searchtype=author&query=Jiang%2C+N">Nana Jiang</a>, <a href="/search/?searchtype=author&query=Wang%2C+H">Haibin Wang</a>, <a href="/search/?searchtype=author&query=Ge%2C+C">Ce Ge</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yushuo Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+W">Wenhao Zhang</a>, <a href="/search/?searchtype=author&query=Ma%2C+Z">Zhijian Ma</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yilei Zhang</a>, <a href="/search/?searchtype=author&query=Huang%2C+J">Jun Huang</a>, <a href="/search/?searchtype=author&query=Lin%2C+W">Wei Lin</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yaliang Li</a>, <a href="/search/?searchtype=author&query=Ding%2C+B">Bolin Ding</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J">Jingren Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14755v1-abstract-short" style="display: inline;"> The burgeoning field of foundation models necessitates advanced data processing mechanisms capable of harnessing vast valuable data with varied types utilized by these models. Nevertheless, the current landscape presents unique challenges that traditional data processing frameworks cannot handle effectively, especially with multimodal intricacies. In response, we present Data-Juicer 2.0, a new sys… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14755v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14755v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14755v1-abstract-full" style="display: none;"> The burgeoning field of foundation models necessitates advanced data processing mechanisms capable of harnessing vast valuable data with varied types utilized by these models. Nevertheless, the current landscape presents unique challenges that traditional data processing frameworks cannot handle effectively, especially with multimodal intricacies. In response, we present Data-Juicer 2.0, a new system offering fruitful data processing capabilities backed by over a hundred operators spanning various modalities like text, image, audio, and video. With seamless compatibility and dedicated optimization to popular dataset hubs like Hugging Face and computing engines like Ray, Data-Juicer 2.0 enhances its predecessor in both usability, efficiency, and programmability. It features an easily accessible user interface layer that supports decoupled Python interactions, RESTful APIs, and conversational commands. Alongside this, it contains a core runtime layer optimized for adaptive execution and management across different dataset scales, processing demands, and computational environments, while shielding unnecessary system details. Extensive empirical evaluations demonstrate Data-Juicer 2.0's remarkable performance and scalability, highlighting its capability to efficiently process tens of billions of data samples with tens of thousands of CPU cores. The system is publicly available, actively maintained, and broadly adopted in diverse research endeavors, practical applications, and real-world products such as Alibaba Cloud PAI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14755v1-abstract-full').style.display = 'none'; document.getElementById('2501.14755v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 9 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13949">arXiv:2501.13949</a> <span> [<a href="https://arxiv.org/pdf/2501.13949">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Can OpenAI o1 Reason Well in Ophthalmology? A 6,990-Question Head-to-Head Evaluation Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Srinivasan%2C+S">Sahana Srinivasan</a>, <a href="/search/?searchtype=author&query=Ai%2C+X">Xuguang Ai</a>, <a href="/search/?searchtype=author&query=Zou%2C+M">Minjie Zou</a>, <a href="/search/?searchtype=author&query=Zou%2C+K">Ke Zou</a>, <a href="/search/?searchtype=author&query=Kim%2C+H">Hyunjae Kim</a>, <a href="/search/?searchtype=author&query=Lo%2C+T+W+S">Thaddaeus Wai Soon Lo</a>, <a href="/search/?searchtype=author&query=Pushpanathan%2C+K">Krithi Pushpanathan</a>, <a href="/search/?searchtype=author&query=Kong%2C+Y">Yiming Kong</a>, <a href="/search/?searchtype=author&query=Li%2C+A">Anran Li</a>, <a href="/search/?searchtype=author&query=Singer%2C+M">Maxwell Singer</a>, <a href="/search/?searchtype=author&query=Jin%2C+K">Kai Jin</a>, <a href="/search/?searchtype=author&query=Antaki%2C+F">Fares Antaki</a>, <a href="/search/?searchtype=author&query=Chen%2C+D+Z">David Ziyou Chen</a>, <a href="/search/?searchtype=author&query=Liu%2C+D">Dianbo Liu</a>, <a href="/search/?searchtype=author&query=Adelman%2C+R+A">Ron A. Adelman</a>, <a href="/search/?searchtype=author&query=Chen%2C+Q">Qingyu Chen</a>, <a href="/search/?searchtype=author&query=Tham%2C+Y+C">Yih Chung Tham</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13949v1-abstract-short" style="display: inline;"> Question: What is the performance and reasoning ability of OpenAI o1 compared to other large language models in addressing ophthalmology-specific questions? Findings: This study evaluated OpenAI o1 and five LLMs using 6,990 ophthalmological questions from MedMCQA. O1 achieved the highest accuracy (0.88) and macro-F1 score but ranked third in reasoning capabilities based on text-generation metric… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13949v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13949v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13949v1-abstract-full" style="display: none;"> Question: What is the performance and reasoning ability of OpenAI o1 compared to other large language models in addressing ophthalmology-specific questions? Findings: This study evaluated OpenAI o1 and five LLMs using 6,990 ophthalmological questions from MedMCQA. O1 achieved the highest accuracy (0.88) and macro-F1 score but ranked third in reasoning capabilities based on text-generation metrics. Across subtopics, o1 ranked first in ``Lens'' and ``Glaucoma'' but second to GPT-4o in ``Corneal and External Diseases'', ``Vitreous and Retina'' and ``Oculoplastic and Orbital Diseases''. Subgroup analyses showed o1 performed better on queries with longer ground truth explanations. Meaning: O1's reasoning enhancements may not fully extend to ophthalmology, underscoring the need for domain-specific refinements to optimize performance in specialized fields like ophthalmology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13949v1-abstract-full').style.display = 'none'; document.getElementById('2501.13949v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">44 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12948">arXiv:2501.12948</a> <span> [<a href="https://arxiv.org/pdf/2501.12948">pdf</a>, <a href="https://arxiv.org/format/2501.12948">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=DeepSeek-AI"> DeepSeek-AI</a>, <a href="/search/?searchtype=author&query=Guo%2C+D">Daya Guo</a>, <a href="/search/?searchtype=author&query=Yang%2C+D">Dejian Yang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Haowei Zhang</a>, <a href="/search/?searchtype=author&query=Song%2C+J">Junxiao Song</a>, <a href="/search/?searchtype=author&query=Zhang%2C+R">Ruoyu Zhang</a>, <a href="/search/?searchtype=author&query=Xu%2C+R">Runxin Xu</a>, <a href="/search/?searchtype=author&query=Zhu%2C+Q">Qihao Zhu</a>, <a href="/search/?searchtype=author&query=Ma%2C+S">Shirong Ma</a>, <a href="/search/?searchtype=author&query=Wang%2C+P">Peiyi Wang</a>, <a href="/search/?searchtype=author&query=Bi%2C+X">Xiao Bi</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xiaokang Zhang</a>, <a href="/search/?searchtype=author&query=Yu%2C+X">Xingkai Yu</a>, <a href="/search/?searchtype=author&query=Wu%2C+Y">Yu Wu</a>, <a href="/search/?searchtype=author&query=Wu%2C+Z+F">Z. F. Wu</a>, <a href="/search/?searchtype=author&query=Gou%2C+Z">Zhibin Gou</a>, <a href="/search/?searchtype=author&query=Shao%2C+Z">Zhihong Shao</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Zhuoshu Li</a>, <a href="/search/?searchtype=author&query=Gao%2C+Z">Ziyi Gao</a>, <a href="/search/?searchtype=author&query=Liu%2C+A">Aixin Liu</a>, <a href="/search/?searchtype=author&query=Xue%2C+B">Bing Xue</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Bingxuan Wang</a>, <a href="/search/?searchtype=author&query=Wu%2C+B">Bochao Wu</a>, <a href="/search/?searchtype=author&query=Feng%2C+B">Bei Feng</a>, <a href="/search/?searchtype=author&query=Lu%2C+C">Chengda Lu</a> , et al. (175 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12948v1-abstract-short" style="display: inline;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12948v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12948v1-abstract-full" style="display: none;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters challenges such as poor readability, and language mixing. To address these issues and further enhance reasoning performance, we introduce DeepSeek-R1, which incorporates multi-stage training and cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217 on reasoning tasks. To support the research community, we open-source DeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B, 70B) distilled from DeepSeek-R1 based on Qwen and Llama. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'none'; document.getElementById('2501.12948v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11286">arXiv:2501.11286</a> <span> [<a href="https://arxiv.org/pdf/2501.11286">pdf</a>, <a href="https://arxiv.org/format/2501.11286">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Photonic-digital Accelerator for Attention Mechanism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+H">Huize Li</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dan Chen</a>, <a href="/search/?searchtype=author&query=Mitra%2C+T">Tulika Mitra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11286v1-abstract-short" style="display: inline;"> The wide adoption and substantial computational resource requirements of attention-based Transformers have spurred the demand for efficient hardware accelerators. Unlike digital-based accelerators, there is growing interest in exploring photonics due to its high energy efficiency and ultra-fast processing speeds. However, the significant signal conversion overhead limits the performance of photoni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11286v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11286v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11286v1-abstract-full" style="display: none;"> The wide adoption and substantial computational resource requirements of attention-based Transformers have spurred the demand for efficient hardware accelerators. Unlike digital-based accelerators, there is growing interest in exploring photonics due to its high energy efficiency and ultra-fast processing speeds. However, the significant signal conversion overhead limits the performance of photonic-based accelerators. In this work, we propose HyAtten, a photonic-based attention accelerator with minimize signal conversion overhead. HyAtten incorporates a signal comparator to classify signals into two categories based on whether they can be processed by low-resolution converters. HyAtten integrates low-resolution converters to process all low-resolution signals, thereby boosting the parallelism of photonic computing. For signals requiring high-resolution conversion, HyAtten uses digital circuits instead of signal converters to reduce area and latency overhead. Compared to state-of-the-art photonic-based Transformer accelerator, HyAtten achieves 9.8X performance/area and 2.2X energy-efficiency/area improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11286v1-abstract-full').style.display = 'none'; document.getElementById('2501.11286v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 8 figures, to be published in DATE 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11041">arXiv:2501.11041</a> <span> [<a href="https://arxiv.org/pdf/2501.11041">pdf</a>, <a href="https://arxiv.org/format/2501.11041">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Semantic Consistency of Large Language Models through Model Editing: An Interpretability-Oriented Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yang%2C+J">Jingyuan Yang</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dapeng Chen</a>, <a href="/search/?searchtype=author&query=Sun%2C+Y">Yajing Sun</a>, <a href="/search/?searchtype=author&query=Li%2C+R">Rongjun Li</a>, <a href="/search/?searchtype=author&query=Feng%2C+Z">Zhiyong Feng</a>, <a href="/search/?searchtype=author&query=Peng%2C+W">Wei Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11041v1-abstract-short" style="display: inline;"> A Large Language Model (LLM) tends to generate inconsistent and sometimes contradictory outputs when presented with a prompt that has equivalent semantics but is expressed differently from the original prompt. To achieve semantic consistency of an LLM, one of the key approaches is to finetune the model with prompt-output pairs with semantically equivalent meanings. Despite its effectiveness, a dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11041v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11041v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11041v1-abstract-full" style="display: none;"> A Large Language Model (LLM) tends to generate inconsistent and sometimes contradictory outputs when presented with a prompt that has equivalent semantics but is expressed differently from the original prompt. To achieve semantic consistency of an LLM, one of the key approaches is to finetune the model with prompt-output pairs with semantically equivalent meanings. Despite its effectiveness, a data-driven finetuning method incurs substantial computation costs in data preparation and model optimization. In this regime, an LLM is treated as a ``black box'', restricting our ability to gain deeper insights into its internal mechanism. In this paper, we are motivated to enhance the semantic consistency of LLMs through a more interpretable method (i.e., model editing) to this end. We first identify the model components (i.e., attention heads) that have a key impact on the semantic consistency of an LLM. We subsequently inject biases into the output of these model components along the semantic-consistency activation direction. It is noteworthy that these modifications are cost-effective, without reliance on mass manipulations of the original model parameters. Through comprehensive experiments on the constructed NLU and open-source NLG datasets, our method demonstrates significant improvements in the semantic consistency and task performance of LLMs. Additionally, our method exhibits promising generalization capabilities by performing well on tasks beyond the primary tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11041v1-abstract-full').style.display = 'none'; document.getElementById('2501.11041v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chen%2C+D&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+D&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>