CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,428 results for author: <span class="mathjax">Chen, K</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Chen%2C+K">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Chen, K"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Chen%2C+K&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Chen, K"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chen%2C+K&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09654">arXiv:2502.09654</a> <span> [<a href="https://arxiv.org/pdf/2502.09654">pdf</a>, <a href="https://arxiv.org/format/2502.09654">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Heterogeneous Mixture of Experts for Remote Sensing Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+B">Bowen Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyan Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Mohan Yang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Z">Zhengxia Zou</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhenwei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09654v1-abstract-short" style="display: inline;"> Remote sensing image super-resolution (SR) aims to reconstruct high-resolution remote sensing images from low-resolution inputs, thereby addressing limitations imposed by sensors and imaging conditions. However, the inherent characteristics of remote sensing images, including diverse ground object types and complex details, pose significant challenges to achieving high-quality reconstruction. Exis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09654v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09654v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09654v1-abstract-full" style="display: none;"> Remote sensing image super-resolution (SR) aims to reconstruct high-resolution remote sensing images from low-resolution inputs, thereby addressing limitations imposed by sensors and imaging conditions. However, the inherent characteristics of remote sensing images, including diverse ground object types and complex details, pose significant challenges to achieving high-quality reconstruction. Existing methods typically employ a uniform structure to process various types of ground objects without distinction, making it difficult to adapt to the complex characteristics of remote sensing images. To address this issue, we introduce a Mixture of Experts (MoE) model and design a set of heterogeneous experts. These experts are organized into multiple expert groups, where experts within each group are homogeneous while being heterogeneous across groups. This design ensures that specialized activation parameters can be employed to handle the diverse and intricate details of ground objects effectively. To better accommodate the heterogeneous experts, we propose a multi-level feature aggregation strategy to guide the routing process. Additionally, we develop a dual-routing mechanism to adaptively select the optimal expert for each pixel. Experiments conducted on the UCMerced and AID datasets demonstrate that our proposed method achieves superior SR reconstruction accuracy compared to state-of-the-art methods. The code will be available at https://github.com/Mr-Bamboo/MFG-HMoE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09654v1-abstract-full').style.display = 'none'; document.getElementById('2502.09654v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08299">arXiv:2502.08299</a> <span> [<a href="https://arxiv.org/pdf/2502.08299">pdf</a>, <a href="https://arxiv.org/format/2502.08299">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> When do they StOP?: A First Step Towards Automatically Identifying Team Communication in the Operating Room </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keqi Chen</a>, <a href="/search/cs?searchtype=author&query=Schewski%2C+L">Lilien Schewski</a>, <a href="/search/cs?searchtype=author&query=Srivastav%2C+V">Vinkle Srivastav</a>, <a href="/search/cs?searchtype=author&query=Lavanchy%2C+J">Jo毛l Lavanchy</a>, <a href="/search/cs?searchtype=author&query=Mutter%2C+D">Didier Mutter</a>, <a href="/search/cs?searchtype=author&query=Beldi%2C+G">Guido Beldi</a>, <a href="/search/cs?searchtype=author&query=Keller%2C+S">Sandra Keller</a>, <a href="/search/cs?searchtype=author&query=Padoy%2C+N">Nicolas Padoy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08299v2-abstract-short" style="display: inline;"> Purpose: Surgical performance depends not only on surgeons' technical skills but also on team communication within and across the different professional groups present during the operation. Therefore, automatically identifying team communication in the OR is crucial for patient safety and advances in the development of computer-assisted surgical workflow analysis and intra-operative support system… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08299v2-abstract-full').style.display = 'inline'; document.getElementById('2502.08299v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08299v2-abstract-full" style="display: none;"> Purpose: Surgical performance depends not only on surgeons' technical skills but also on team communication within and across the different professional groups present during the operation. Therefore, automatically identifying team communication in the OR is crucial for patient safety and advances in the development of computer-assisted surgical workflow analysis and intra-operative support systems. To take the first step, we propose a new task of detecting communication briefings involving all OR team members, i.e. the team Time-out and the StOP?-protocol, by localizing their start and end times in video recordings of surgical operations. Methods: We generate an OR dataset of real surgeries, called Team-OR, with more than one hundred hours of surgical videos captured by the multi-view camera system in the OR. The dataset contains temporal annotations of 33 Time-out and 22 StOP?-protocol activities in total. We then propose a novel group activity detection approach, where we encode both scene context and action features, and use an efficient neural network model to output the results. Results: The experimental results on the Team-OR dataset show that our approach outperforms existing state-of-the-art temporal action detection approaches. It also demonstrates the lack of research on group activities in the OR, proving the significance of our dataset. Conclusion: We investigate the Team Time-Out and the StOP?-protocol in the OR, by presenting the first OR dataset with temporal annotations of group activities protocols, and introducing a novel group activity detection approach that outperforms existing approaches. Code is available at https://github.com/CAMMA-public/Team-OR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08299v2-abstract-full').style.display = 'none'; document.getElementById('2502.08299v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08211">arXiv:2502.08211</a> <span> [<a href="https://arxiv.org/pdf/2502.08211">pdf</a>, <a href="https://arxiv.org/format/2502.08211">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Quality over Quantity: Boosting Data Efficiency Through Ensembled Multimodal Data Curation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jinda Xu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yuhao Song</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Daming Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Weiwei Zhao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Minghua Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kangliang Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qinya Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08211v1-abstract-short" style="display: inline;"> In an era overwhelmed by vast amounts of data, the effective curation of web-crawl datasets is essential for optimizing model performance. This paper tackles the challenges associated with the unstructured and heterogeneous nature of such datasets. Traditional heuristic curation methods often inadequately capture complex features, resulting in biases and the exclusion of relevant data. We introduc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08211v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08211v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08211v1-abstract-full" style="display: none;"> In an era overwhelmed by vast amounts of data, the effective curation of web-crawl datasets is essential for optimizing model performance. This paper tackles the challenges associated with the unstructured and heterogeneous nature of such datasets. Traditional heuristic curation methods often inadequately capture complex features, resulting in biases and the exclusion of relevant data. We introduce an advanced, learning-driven approach, Ensemble Curation Of DAta ThroUgh Multimodal Operators (EcoDatum), incorporating a novel quality-guided deduplication method to ensure balanced feature distributions. EcoDatum strategically integrates various unimodal and multimodal data curation operators within a weak supervision ensemble framework, utilizing automated optimization to score each data point effectively. EcoDatum, which significantly improves the data curation quality and efficiency, outperforms existing state-of-the-art (SOTA) techniques, ranked 1st on the DataComp leaderboard, with an average performance score of 0.182 across 38 diverse evaluation datasets. This represents a 28% improvement over the DataComp baseline method, demonstrating its effectiveness in improving dataset curation and model training efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08211v1-abstract-full').style.display = 'none'; document.getElementById('2502.08211v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07814">arXiv:2502.07814</a> <span> [<a href="https://arxiv.org/pdf/2502.07814">pdf</a>, <a href="https://arxiv.org/format/2502.07814">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> </div> </div> <p class="title is-5 mathjax"> Satellite Observations Guided Diffusion Model for Accurate Meteorological States at Arbitrary Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tu%2C+S">Siwei Tu</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+B">Ben Fei</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weidong Yang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+F">Fenghua Ling</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zili Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kun Chen</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+H">Hang Fan</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Lei Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07814v1-abstract-short" style="display: inline;"> Accurate acquisition of surface meteorological conditions at arbitrary locations holds significant importance for weather forecasting and climate simulation. Due to the fact that meteorological states derived from satellite observations are often provided in the form of low-resolution grid fields, the direct application of spatial interpolation to obtain meteorological states for specific location… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07814v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07814v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07814v1-abstract-full" style="display: none;"> Accurate acquisition of surface meteorological conditions at arbitrary locations holds significant importance for weather forecasting and climate simulation. Due to the fact that meteorological states derived from satellite observations are often provided in the form of low-resolution grid fields, the direct application of spatial interpolation to obtain meteorological states for specific locations often results in significant discrepancies when compared to actual observations. Existing downscaling methods for acquiring meteorological state information at higher resolutions commonly overlook the correlation with satellite observations. To bridge the gap, we propose Satellite-observations Guided Diffusion Model (SGD), a conditional diffusion model pre-trained on ERA5 reanalysis data with satellite observations (GridSat) as conditions, which is employed for sampling downscaled meteorological states through a zero-shot guided sampling strategy and patch-based methods. During the training process, we propose to fuse the information from GridSat satellite observations into ERA5 maps via the attention mechanism, enabling SGD to generate atmospheric states that align more accurately with actual conditions. In the sampling, we employed optimizable convolutional kernels to simulate the upscale process, thereby generating high-resolution ERA5 maps using low-resolution ERA5 maps as well as observations from weather stations as guidance. Moreover, our devised patch-based method promotes SGD to generate meteorological states at arbitrary resolutions. Experiments demonstrate SGD fulfills accurate meteorological states downscaling to 6.25km. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07814v1-abstract-full').style.display = 'none'; document.getElementById('2502.07814v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07373">arXiv:2502.07373</a> <span> [<a href="https://arxiv.org/pdf/2502.07373">pdf</a>, <a href="https://arxiv.org/format/2502.07373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> EvoFlow: Evolving Diverse Agentic Workflows On The Fly </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guibin Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kaijie Chen</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+G">Guancheng Wan</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+H">Heng Chang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+H">Hong Cheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kun Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shuyue Hu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Lei Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07373v1-abstract-short" style="display: inline;"> The past two years have witnessed the evolution of large language model (LLM)-based multi-agent systems from labor-intensive manual design to partial automation (\textit{e.g.}, prompt engineering, communication topology) and eventually to fully automated design. However, existing agentic automation pipelines often lack LLM heterogeneity and focus on single-objective performance optimization, limit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07373v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07373v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07373v1-abstract-full" style="display: none;"> The past two years have witnessed the evolution of large language model (LLM)-based multi-agent systems from labor-intensive manual design to partial automation (\textit{e.g.}, prompt engineering, communication topology) and eventually to fully automated design. However, existing agentic automation pipelines often lack LLM heterogeneity and focus on single-objective performance optimization, limiting their potential to combine weaker models for more customized and cost-effective solutions. To address this challenge, we propose EvoFlow, a niching evolutionary algorithm-based framework to automatically search a population of heterogeneous and complexity-adaptive agentic workflows, rather than a single homogeneous, complex workflow. Technically, EvoFlow performs \textit{(1) tag-based retrieval} to extract parent workflows from an agentic population, evolves new workflows through \textit{(2) crossover} and \textit{(3) mutation}, and employs \textit{(4) niching-based selection} to maintain population diversity and quality. Extensive evaluations across seven benchmarks demonstrate that EvoFlow is: \textbf{(I) diverse}, evolving a population of workflows ranging from simple I/O tasks to complex multi-turn interactions; \textbf{(II) high-performing}, outperforming previous handcrafted and automated workflows by $1.23\%\sim29.86\%$; \textbf{(III) economical}, surpassing powerful \llmname{o1-preview} at $12.4\%$ of its inference cost using weaker open-source models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07373v1-abstract-full').style.display = 'none'; document.getElementById('2502.07373v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07172">arXiv:2502.07172</a> <span> [<a href="https://arxiv.org/pdf/2502.07172">pdf</a>, <a href="https://arxiv.org/ps/2502.07172">ps</a>, <a href="https://arxiv.org/format/2502.07172">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SemiHMER: Semi-supervised Handwritten Mathematical Expression Recognition using pseudo-labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kehua Chen</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Haoyang Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07172v1-abstract-short" style="display: inline;"> In recent years, deep learning with Convolutional Neural Networks (CNNs) has achieved remarkable results in the field of HMER (Handwritten Mathematical Expression Recognition). However, it remains challenging to improve performance with limited labeled training data. This paper presents, for the first time, a simple yet effective semi-supervised HMER framework by introducing dual-branch semi-super… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07172v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07172v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07172v1-abstract-full" style="display: none;"> In recent years, deep learning with Convolutional Neural Networks (CNNs) has achieved remarkable results in the field of HMER (Handwritten Mathematical Expression Recognition). However, it remains challenging to improve performance with limited labeled training data. This paper presents, for the first time, a simple yet effective semi-supervised HMER framework by introducing dual-branch semi-supervised learning. Specifically, we simplify the conventional deep co-training from consistency regularization to cross-supervised learning, where the prediction of one branch is used as a pseudo-label to supervise the other branch directly end-to-end. Considering that the learning of the two branches tends to converge in the later stages of model optimization, we also incorporate a weak-to-strong strategy by applying different levels of augmentation to each branch, which behaves like expanding the training data and improving the quality of network training. Meanwhile, We propose a novel module, Global Dynamic Counting Module(GDCM), to enhance the performance of the HMER decoder, which alleviates recognition inaccuracies in long-distance formula recognition and the occurrence of repeated characters. We release our code at https://github.com/chenkehua/SemiHMER. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07172v1-abstract-full').style.display = 'none'; document.getElementById('2502.07172v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages,3 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Under review at ICDAR2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06781">arXiv:2502.06781</a> <span> [<a href="https://arxiv.org/pdf/2502.06781">pdf</a>, <a href="https://arxiv.org/format/2502.06781">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Limit of Outcome Reward for Learning Mathematical Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+C">Chengqi Lyu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Songyang Gao</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yuzhe Gu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jianfei Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kuikun Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziyi Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuaibin Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Q">Qian Zhao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haian Huang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weihan Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiangning Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongwei Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junnan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Songyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+D">Dahua Lin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06781v1-abstract-short" style="display: inline;"> Reasoning abilities, especially those for solving complex math problems, are crucial components of general intelligence. Recent advances by proprietary companies, such as o-series models of OpenAI, have made remarkable progress on reasoning tasks. However, the complete technical details remain unrevealed, and the techniques that are believed certainly to be adopted are only reinforcement learning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06781v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06781v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06781v1-abstract-full" style="display: none;"> Reasoning abilities, especially those for solving complex math problems, are crucial components of general intelligence. Recent advances by proprietary companies, such as o-series models of OpenAI, have made remarkable progress on reasoning tasks. However, the complete technical details remain unrevealed, and the techniques that are believed certainly to be adopted are only reinforcement learning (RL) and the long chain of thoughts. This paper proposes a new RL framework, termed OREAL, to pursue the performance limit that can be achieved through \textbf{O}utcome \textbf{RE}w\textbf{A}rd-based reinforcement \textbf{L}earning for mathematical reasoning tasks, where only binary outcome rewards are easily accessible. We theoretically prove that behavior cloning on positive trajectories from best-of-N (BoN) sampling is sufficient to learn the KL-regularized optimal policy in binary feedback environments. This formulation further implies that the rewards of negative samples should be reshaped to ensure the gradient consistency between positive and negative samples. To alleviate the long-existing difficulties brought by sparse rewards in RL, which are even exacerbated by the partial correctness of the long chain of thought for reasoning tasks, we further apply a token-level reward model to sample important tokens in reasoning trajectories for learning. With OREAL, for the first time, a 7B model can obtain 94.0 pass@1 accuracy on MATH-500 through RL, being on par with 32B models. OREAL-32B also surpasses previous 32B models trained by distillation with 95.0 pass@1 accuracy on MATH-500. Our investigation also indicates the importance of initial policy models and training queries for RL. Code, models, and data will be released to benefit future research\footnote{https://github.com/InternLM/OREAL}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06781v1-abstract-full').style.display = 'none'; document.getElementById('2502.06781v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">We released our code, data, and model on https://github.com/InternLM/OREAL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06424">arXiv:2502.06424</a> <span> [<a href="https://arxiv.org/pdf/2502.06424">pdf</a>, <a href="https://arxiv.org/format/2502.06424">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CS-SHAP: Extending SHAP to Cyclic-Spectral Domain for Better Interpretability of Intelligent Fault Diagnosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qian Chen</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+X">Xingjian Dong</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+K">Kui Hu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kangkang Chen</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Zhike Peng</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+G">Guang Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06424v1-abstract-short" style="display: inline;"> Neural networks (NNs), with their powerful nonlinear mapping and end-to-end capabilities, are widely applied in mechanical intelligent fault diagnosis (IFD). However, as typical black-box models, they pose challenges in understanding their decision basis and logic, limiting their deployment in high-reliability scenarios. Hence, various methods have been proposed to enhance the interpretability of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06424v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06424v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06424v1-abstract-full" style="display: none;"> Neural networks (NNs), with their powerful nonlinear mapping and end-to-end capabilities, are widely applied in mechanical intelligent fault diagnosis (IFD). However, as typical black-box models, they pose challenges in understanding their decision basis and logic, limiting their deployment in high-reliability scenarios. Hence, various methods have been proposed to enhance the interpretability of IFD. Among these, post-hoc approaches can provide explanations without changing model architecture, preserving its flexibility and scalability. However, existing post-hoc methods often suffer from limitations in explanation forms. They either require preprocessing that disrupts the end-to-end nature or overlook fault mechanisms, leading to suboptimal explanations. To address these issues, we derived the cyclic-spectral (CS) transform and proposed the CS-SHAP by extending Shapley additive explanations (SHAP) to the CS domain. CS-SHAP can evaluate contributions from both carrier and modulation frequencies, aligning more closely with fault mechanisms and delivering clearer and more accurate explanations. Three datasets are utilized to validate the superior interpretability of CS-SHAP, ensuring its correctness, reproducibility, and practical performance. With open-source code and outstanding interpretability, CS-SHAP has the potential to be widely adopted and become the post-hoc interpretability benchmark in IFD, even in other classification tasks. The code is available on https://github.com/ChenQian0618/CS-SHAP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06424v1-abstract-full').style.display = 'none'; document.getElementById('2502.06424v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 21 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05557">arXiv:2502.05557</a> <span> [<a href="https://arxiv.org/pdf/2502.05557">pdf</a>, <a href="https://arxiv.org/format/2502.05557">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MMHMER:Multi-viewer and Multi-task for Handwritten Mathematical Expression Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kehua Chen</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Haoyang Shen</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+L">Lifan Zhong</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mingyi Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05557v1-abstract-short" style="display: inline;"> Handwritten Mathematical Expression Recognition (HMER) methods have made remarkable progress, with most existing HMER approaches based on either a hybrid CNN/RNN-based with GRU architecture or Transformer architectures. Each of these has its strengths and weaknesses. Leveraging different model structures as viewers and effectively integrating their diverse capabilities presents an intriguing avenu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05557v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05557v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05557v1-abstract-full" style="display: none;"> Handwritten Mathematical Expression Recognition (HMER) methods have made remarkable progress, with most existing HMER approaches based on either a hybrid CNN/RNN-based with GRU architecture or Transformer architectures. Each of these has its strengths and weaknesses. Leveraging different model structures as viewers and effectively integrating their diverse capabilities presents an intriguing avenue for exploration. This involves addressing two key challenges: 1) How to fuse these two methods effectively, and 2) How to achieve higher performance under an appropriate level of complexity. This paper proposes an efficient CNN-Transformer multi-viewer, multi-task approach to enhance the model's recognition performance. Our MMHMER model achieves 63.96%, 62.51%, and 65.46% ExpRate on CROHME14, CROHME16, and CROHME19, outperforming Posformer with an absolute gain of 1.28%, 1.48%, and 0.58%. The main contribution of our approach is that we propose a new multi-view, multi-task framework that can effectively integrate the strengths of CNN and Transformer. By leveraging the feature extraction capabilities of CNN and the sequence modeling capabilities of Transformer, our model can better handle the complexity of handwritten mathematical expressions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05557v1-abstract-full').style.display = 'none'; document.getElementById('2502.05557v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages;2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04116">arXiv:2502.04116</a> <span> [<a href="https://arxiv.org/pdf/2502.04116">pdf</a>, <a href="https://arxiv.org/format/2502.04116">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Generative Adversarial Networks Bridging Art and Machine Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+J">Junhao Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xuanhe Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinlang Wang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L+K+Q">Lawrence K. Q. Yan</a>, <a href="/search/cs?searchtype=author&query=Tseng%2C+H">Hong-Ming Tseng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyuan Song</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jintao Ren</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yunze Wang</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+W">Weiche Hsieh</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+B">Bowen Jing</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Junjie Yang</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04116v2-abstract-short" style="display: inline;"> Generative Adversarial Networks (GAN) have greatly influenced the development of computer vision and artificial intelligence in the past decade and also connected art and machine intelligence together. This book begins with a detailed introduction to the fundamental principles and historical development of GANs, contrasting them with traditional generative models and elucidating the core adversari… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04116v2-abstract-full').style.display = 'inline'; document.getElementById('2502.04116v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04116v2-abstract-full" style="display: none;"> Generative Adversarial Networks (GAN) have greatly influenced the development of computer vision and artificial intelligence in the past decade and also connected art and machine intelligence together. This book begins with a detailed introduction to the fundamental principles and historical development of GANs, contrasting them with traditional generative models and elucidating the core adversarial mechanisms through illustrative Python examples. The text systematically addresses the mathematical and theoretical underpinnings including probability theory, statistics, and game theory providing a solid framework for understanding the objectives, loss functions, and optimisation challenges inherent to GAN training. Subsequent chapters review classic variants such as Conditional GANs, DCGANs, InfoGAN, and LAPGAN before progressing to advanced training methodologies like Wasserstein GANs, GANs with gradient penalty, least squares GANs, and spectral normalisation techniques. The book further examines architectural enhancements and task-specific adaptations in generators and discriminators, showcasing practical implementations in high resolution image generation, artistic style transfer, video synthesis, text to image generation and other multimedia applications. The concluding sections offer insights into emerging research trends, including self-attention mechanisms, transformer-based generative models, and a comparative analysis with diffusion models, thus charting promising directions for future developments in both academic and applied settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04116v2-abstract-full').style.display = 'none'; document.getElementById('2502.04116v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03478">arXiv:2502.03478</a> <span> [<a href="https://arxiv.org/pdf/2502.03478">pdf</a>, <a href="https://arxiv.org/ps/2502.03478">ps</a>, <a href="https://arxiv.org/format/2502.03478">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> From In Silico to In Vitro: A Comprehensive Guide to Validating Bioinformatics Findings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yunze Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyuan Song</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xintian Sun</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Charles Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+C">Cheng Fei</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L+K">Lawrence KQ Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03478v1-abstract-short" style="display: inline;"> The integration of bioinformatics predictions and experimental validation plays a pivotal role in advancing biological research, from understanding molecular mechanisms to developing therapeutic strategies. Bioinformatics tools and methods offer powerful means for predicting gene functions, protein interactions, and regulatory networks, but these predictions must be validated through experimental… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03478v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03478v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03478v1-abstract-full" style="display: none;"> The integration of bioinformatics predictions and experimental validation plays a pivotal role in advancing biological research, from understanding molecular mechanisms to developing therapeutic strategies. Bioinformatics tools and methods offer powerful means for predicting gene functions, protein interactions, and regulatory networks, but these predictions must be validated through experimental approaches to ensure their biological relevance. This review explores the various methods and technologies used for experimental validation, including gene expression analysis, protein-protein interaction verification, and pathway validation. We also discuss the challenges involved in translating computational predictions to experimental settings and highlight the importance of collaboration between bioinformatics and experimental research. Finally, emerging technologies, such as CRISPR gene editing, next-generation sequencing, and artificial intelligence, are shaping the future of bioinformatics validation and driving more accurate and efficient biological discoveries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03478v1-abstract-full').style.display = 'none'; document.getElementById('2502.03478v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02682">arXiv:2502.02682</a> <span> [<a href="https://arxiv.org/pdf/2502.02682">pdf</a>, <a href="https://arxiv.org/format/2502.02682">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> Pseudo-Physics-Informed Neural Operators: Enhancing Operator Learning from Limited Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyan Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yile Li</a>, <a href="/search/cs?searchtype=author&query=Long%2C+D">Da Long</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhitong Xu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+W">Wei Xing</a>, <a href="/search/cs?searchtype=author&query=Hochhalter%2C+J">Jacob Hochhalter</a>, <a href="/search/cs?searchtype=author&query=Zhe%2C+S">Shandian Zhe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02682v1-abstract-short" style="display: inline;"> Neural operators have shown great potential in surrogate modeling. However, training a well-performing neural operator typically requires a substantial amount of data, which can pose a major challenge in complex applications. In such scenarios, detailed physical knowledge can be unavailable or difficult to obtain, and collecting extensive data is often prohibitively expensive. To mitigate this cha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02682v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02682v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02682v1-abstract-full" style="display: none;"> Neural operators have shown great potential in surrogate modeling. However, training a well-performing neural operator typically requires a substantial amount of data, which can pose a major challenge in complex applications. In such scenarios, detailed physical knowledge can be unavailable or difficult to obtain, and collecting extensive data is often prohibitively expensive. To mitigate this challenge, we propose the Pseudo Physics-Informed Neural Operator (PPI-NO) framework. PPI-NO constructs a surrogate physics system for the target system using partial differential equations (PDEs) derived from simple, rudimentary physics principles, such as basic differential operators. This surrogate system is coupled with a neural operator model, using an alternating update and learning process to iteratively enhance the model's predictive power. While the physics derived via PPI-NO may not mirror the ground-truth underlying physical laws -- hence the term ``pseudo physics'' -- this approach significantly improves the accuracy of standard operator learning models in data-scarce scenarios, which is evidenced by extensive evaluations across five benchmark tasks and a fatigue modeling application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02682v1-abstract-full').style.display = 'none'; document.getElementById('2502.02682v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01846">arXiv:2502.01846</a> <span> [<a href="https://arxiv.org/pdf/2502.01846">pdf</a>, <a href="https://arxiv.org/format/2502.01846">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> UVGS: Reimagining Unstructured 3D Gaussian Splatting using UV Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rai%2C+A">Aashish Rai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dilin Wang</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+M">Mihir Jain</a>, <a href="/search/cs?searchtype=author&query=Sarafianos%2C+N">Nikolaos Sarafianos</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kefan Chen</a>, <a href="/search/cs?searchtype=author&query=Sridhar%2C+S">Srinath Sridhar</a>, <a href="/search/cs?searchtype=author&query=Prakash%2C+A">Aayush Prakash</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01846v2-abstract-short" style="display: inline;"> 3D Gaussian Splatting (3DGS) has demonstrated superior quality in modeling 3D objects and scenes. However, generating 3DGS remains challenging due to their discrete, unstructured, and permutation-invariant nature. In this work, we present a simple yet effective method to overcome these challenges. We utilize spherical mapping to transform 3DGS into a structured 2D representation, termed UVGS. UVGS… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01846v2-abstract-full').style.display = 'inline'; document.getElementById('2502.01846v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01846v2-abstract-full" style="display: none;"> 3D Gaussian Splatting (3DGS) has demonstrated superior quality in modeling 3D objects and scenes. However, generating 3DGS remains challenging due to their discrete, unstructured, and permutation-invariant nature. In this work, we present a simple yet effective method to overcome these challenges. We utilize spherical mapping to transform 3DGS into a structured 2D representation, termed UVGS. UVGS can be viewed as multi-channel images, with feature dimensions as a concatenation of Gaussian attributes such as position, scale, color, opacity, and rotation. We further find that these heterogeneous features can be compressed into a lower-dimensional (e.g., 3-channel) shared feature space using a carefully designed multi-branch network. The compressed UVGS can be treated as typical RGB images. Remarkably, we discover that typical VAEs trained with latent diffusion models can directly generalize to this new representation without additional training. Our novel representation makes it effortless to leverage foundational 2D models, such as diffusion models, to directly model 3DGS. Additionally, one can simply increase the 2D UV resolution to accommodate more Gaussians, making UVGS a scalable solution compared to typical 3D backbones. This approach immediately unlocks various novel generation applications of 3DGS by inherently utilizing the already developed superior 2D generation capabilities. In our experiments, we demonstrate various unconditional, conditional generation, and inpainting applications of 3DGS based on diffusion models, which were previously non-trivial. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01846v2-abstract-full').style.display = 'none'; document.getElementById('2502.01846v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://aashishrai3799.github.io/uvgs</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01600">arXiv:2502.01600</a> <span> [<a href="https://arxiv.org/pdf/2502.01600">pdf</a>, <a href="https://arxiv.org/format/2502.01600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Reinforcement Learning for Long-Horizon Interactive LLM Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kevin Chen</a>, <a href="/search/cs?searchtype=author&query=Cusumano-Towner%2C+M">Marco Cusumano-Towner</a>, <a href="/search/cs?searchtype=author&query=Huval%2C+B">Brody Huval</a>, <a href="/search/cs?searchtype=author&query=Petrenko%2C+A">Aleksei Petrenko</a>, <a href="/search/cs?searchtype=author&query=Hamburger%2C+J">Jackson Hamburger</a>, <a href="/search/cs?searchtype=author&query=Koltun%2C+V">Vladlen Koltun</a>, <a href="/search/cs?searchtype=author&query=Kr%C3%A4henb%C3%BChl%2C+P">Philipp Kr盲henb眉hl</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01600v2-abstract-short" style="display: inline;"> Interactive digital agents (IDAs) leverage APIs of stateful digital environments to perform tasks in response to user requests. While IDAs powered by instruction-tuned large language models (LLMs) can react to feedback from interface invocations in multi-step exchanges, they have not been trained in their respective digital environments. Prior methods accomplish less than half of tasks in sophisti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01600v2-abstract-full').style.display = 'inline'; document.getElementById('2502.01600v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01600v2-abstract-full" style="display: none;"> Interactive digital agents (IDAs) leverage APIs of stateful digital environments to perform tasks in response to user requests. While IDAs powered by instruction-tuned large language models (LLMs) can react to feedback from interface invocations in multi-step exchanges, they have not been trained in their respective digital environments. Prior methods accomplish less than half of tasks in sophisticated benchmarks such as AppWorld. We present a reinforcement learning (RL) approach that trains IDAs directly in their target environments. We formalize this training as a partially observable Markov decision process and derive LOOP, a data- and memory-efficient variant of proximal policy optimization. LOOP uses no value network and maintains exactly one copy of the underlying LLM in memory, making its implementation straightforward and as memory-efficient as fine-tuning a single LLM. A 32-billion-parameter agent trained with LOOP in the AppWorld environment outperforms the much larger OpenAI o1 agent by 9 percentage points (15% relative). To our knowledge, this is the first reported application of RL to IDAs that interact with a stateful, multi-domain, multi-app environment via direct API calls. Our analysis sheds light on the effectiveness of RL in this area, showing that the agent learns to consult the API documentation, avoid unwarranted assumptions, minimize confabulation, and recover from setbacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01600v2-abstract-full').style.display = 'none'; document.getElementById('2502.01600v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01181">arXiv:2502.01181</a> <span> [<a href="https://arxiv.org/pdf/2502.01181">pdf</a>, <a href="https://arxiv.org/format/2502.01181">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BVINet: Unlocking Blind Video Inpainting with Zero Annotations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhiliang Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kerui Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kun Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+H">Hehe Fan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yi Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01181v1-abstract-short" style="display: inline;"> Video inpainting aims to fill in corrupted regions of the video with plausible contents. Existing methods generally assume that the locations of corrupted regions are known, focusing primarily on the "how to inpaint". This reliance necessitates manual annotation of the corrupted regions using binary masks to indicate "whereto inpaint". However, the annotation of these masks is labor-intensive and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01181v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01181v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01181v1-abstract-full" style="display: none;"> Video inpainting aims to fill in corrupted regions of the video with plausible contents. Existing methods generally assume that the locations of corrupted regions are known, focusing primarily on the "how to inpaint". This reliance necessitates manual annotation of the corrupted regions using binary masks to indicate "whereto inpaint". However, the annotation of these masks is labor-intensive and expensive, limiting the practicality of current methods. In this paper, we expect to relax this assumption by defining a new blind video inpainting setting, enabling the networks to learn the mapping from corrupted video to inpainted result directly, eliminating the need of corrupted region annotations. Specifically, we propose an end-to-end blind video inpainting network (BVINet) to address both "where to inpaint" and "how to inpaint" simultaneously. On the one hand, BVINet can predict the masks of corrupted regions by detecting semantic-discontinuous regions of the frame and utilizing temporal consistency prior of the video. On the other hand, the predicted masks are incorporated into the BVINet, allowing it to capture valid context information from uncorrupted regions to fill in corrupted ones. Besides, we introduce a consistency loss to regularize the training parameters of BVINet. In this way, mask prediction and video completion mutually constrain each other, thereby maximizing the overall performance of the trained model. Furthermore, we customize a dataset consisting of synthetic corrupted videos, real-world corrupted videos, and their corresponding completed videos. This dataset serves as a valuable resource for advancing blind video inpainting research. Extensive experimental results demonstrate the effectiveness and superiority of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01181v1-abstract-full').style.display = 'none'; document.getElementById('2502.01181v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01116">arXiv:2502.01116</a> <span> [<a href="https://arxiv.org/pdf/2502.01116">pdf</a>, <a href="https://arxiv.org/format/2502.01116">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Picky LLMs and Unreliable RMs: An Empirical Study on Safety Alignment after Instruction Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+G">Guanlin Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kangjie Chen</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+S">Shangwei Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+H">Han Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guoyin Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiwei Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01116v1-abstract-short" style="display: inline;"> Large language models (LLMs) have emerged as powerful tools for addressing a wide range of general inquiries and tasks. Despite this, fine-tuning aligned LLMs on smaller, domain-specific datasets, critical to adapting them to specialized tasks, can inadvertently degrade their safety alignment, even when the datasets are benign. This phenomenon makes models more susceptible to providing inappropria… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01116v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01116v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01116v1-abstract-full" style="display: none;"> Large language models (LLMs) have emerged as powerful tools for addressing a wide range of general inquiries and tasks. Despite this, fine-tuning aligned LLMs on smaller, domain-specific datasets, critical to adapting them to specialized tasks, can inadvertently degrade their safety alignment, even when the datasets are benign. This phenomenon makes models more susceptible to providing inappropriate responses. In this study, we systematically examine the factors contributing to safety alignment degradation in benign fine-tuning scenarios. Our analysis identifies three critical factors affecting aligned LLMs: answer structure, identity calibration, and role-play. Additionally, we evaluate the reliability of state-of-the-art reward models (RMs), which are often used to guide alignment processes. Our findings reveal that these RMs frequently fail to accurately reflect human preferences regarding safety, underscoring their limitations in practical applications. By uncovering these challenges, our work highlights the complexities of maintaining safety alignment during fine-tuning and offers guidance to help developers balance utility and safety in LLMs. Datasets and fine-tuning code used in our experiments can be found in https://github.com/GuanlinLee/llm_instruction_tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01116v1-abstract-full').style.display = 'none'; document.getElementById('2502.01116v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00847">arXiv:2502.00847</a> <span> [<a href="https://arxiv.org/pdf/2502.00847">pdf</a>, <a href="https://arxiv.org/format/2502.00847">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SecPE: Secure Prompt Ensembling for Private and Robust Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiawen Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kejia Chen</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Z">Zunlei Feng</a>, <a href="/search/cs?searchtype=author&query=Lou%2C+J">Jian Lou</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Mingli Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaohu Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00847v1-abstract-short" style="display: inline;"> With the growing popularity of LLMs among the general public users, privacy-preserving and adversarial robustness have become two pressing demands for LLM-based services, which have largely been pursued separately but rarely jointly. In this paper, to the best of our knowledge, we are among the first attempts towards robust and private LLM inference by tightly integrating two disconnected fields:… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00847v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00847v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00847v1-abstract-full" style="display: none;"> With the growing popularity of LLMs among the general public users, privacy-preserving and adversarial robustness have become two pressing demands for LLM-based services, which have largely been pursued separately but rarely jointly. In this paper, to the best of our knowledge, we are among the first attempts towards robust and private LLM inference by tightly integrating two disconnected fields: private inference and prompt ensembling. The former protects users' privacy by encrypting inference data transmitted and processed by LLMs, while the latter enhances adversarial robustness by yielding an aggregated output from multiple prompted LLM responses. Although widely recognized as effective individually, private inference for prompt ensembling together entails new challenges that render the naive combination of existing techniques inefficient. To overcome the hurdles, we propose SecPE, which designs efficient fully homomorphic encryption (FHE) counterparts for the core algorithmic building blocks of prompt ensembling. We conduct extensive experiments on 8 tasks to evaluate the accuracy, robustness, and efficiency of SecPE. The results show that SecPE maintains high clean accuracy and offers better robustness at the expense of merely $2.5\%$ efficiency overhead compared to baseline private inference methods, indicating a satisfactory ``accuracy-robustness-efficiency'' tradeoff. For the efficiency of the encrypted Argmax operation that incurs major slowdown for prompt ensembling, SecPE is 35.4x faster than the state-of-the-art peers, which can be of independent interest beyond this work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00847v1-abstract-full').style.display = 'none'; document.getElementById('2502.00847v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00840">arXiv:2502.00840</a> <span> [<a href="https://arxiv.org/pdf/2502.00840">pdf</a>, <a href="https://arxiv.org/format/2502.00840">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Activation Approximations Can Incur Safety Vulnerabilities Even in Aligned LLMs: Comprehensive Analysis and Defense </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiawen Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kejia Chen</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Lipeng He</a>, <a href="/search/cs?searchtype=author&query=Lou%2C+J">Jian Lou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dan Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Z">Zunlei Feng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Mingli Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+K">Kui Ren</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaohu Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00840v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have showcased remarkable capabilities across various domains. Accompanying the evolving capabilities and expanding deployment scenarios of LLMs, their deployment challenges escalate due to their sheer scale and the advanced yet complex activation designs prevalent in notable model series, such as Llama, Gemma, and Mistral. These challenges have become particularly pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00840v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00840v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00840v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have showcased remarkable capabilities across various domains. Accompanying the evolving capabilities and expanding deployment scenarios of LLMs, their deployment challenges escalate due to their sheer scale and the advanced yet complex activation designs prevalent in notable model series, such as Llama, Gemma, and Mistral. These challenges have become particularly pronounced in resource-constrained deployment scenarios, where mitigating inference efficiency bottlenecks is imperative. Among various recent efforts, activation approximation has emerged as a promising avenue for pursuing inference efficiency, sometimes considered indispensable in applications such as private inference. Despite achieving substantial speedups with minimal impact on utility, even appearing sound and practical for real-world deployment, the safety implications of activation approximations remain unclear. In this work, we fill this critical gap in LLM safety by conducting the first systematic safety evaluation of activation approximations. Our safety vetting spans seven sota techniques across three popular categories, revealing consistent safety degradation across ten safety-aligned LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00840v1-abstract-full').style.display = 'none'; document.getElementById('2502.00840v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00340">arXiv:2502.00340</a> <span> [<a href="https://arxiv.org/pdf/2502.00340">pdf</a>, <a href="https://arxiv.org/format/2502.00340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Token Filtering Efficiency in Large Language Model Training with Collider </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chai%2C+D">Di Chai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pengbo Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Feiyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yilun Jin</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+H">Han Tian</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00340v1-abstract-short" style="display: inline;"> Token filtering has been proposed to enhance utility of large language models (LLMs) by eliminating inconsequential tokens during training. While using fewer tokens should reduce computational workloads, existing studies have not succeeded in achieving higher efficiency. This is primarily due to the insufficient sparsity caused by filtering tokens only in the output layers, as well as inefficient… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00340v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00340v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00340v1-abstract-full" style="display: none;"> Token filtering has been proposed to enhance utility of large language models (LLMs) by eliminating inconsequential tokens during training. While using fewer tokens should reduce computational workloads, existing studies have not succeeded in achieving higher efficiency. This is primarily due to the insufficient sparsity caused by filtering tokens only in the output layers, as well as inefficient sparse GEMM (General Matrix Multiplication), even when having sufficient sparsity. This paper presents Collider, a system unleashing the full efficiency of token filtering in LLM training. At its core, Collider filters activations of inconsequential tokens across all layers to maintain sparsity. Additionally, it features an automatic workflow that transforms sparse GEMM into dimension-reduced dense GEMM for optimized efficiency. Evaluations on three LLMs-TinyLlama-1.1B, Qwen2.5-1.5B, and Phi1.5-1.4B-demonstrate that Collider reduces backpropagation time by up to 35.1% and end-to-end training time by up to 22.0% when filtering 40% of tokens. Utility assessments of training TinyLlama on 15B tokens indicate that Collider sustains the utility advancements of token filtering by relatively improving model utility by 16.3% comparing to regular training, and reduces training time from 4.7 days to 3.5 days using 8 GPUs. Collider is designed for easy integration into existing LLM training frameworks, allowing systems already using token filtering to accelerate training with just one line of code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00340v1-abstract-full').style.display = 'none'; document.getElementById('2502.00340v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19051">arXiv:2501.19051</a> <span> [<a href="https://arxiv.org/pdf/2501.19051">pdf</a>, <a href="https://arxiv.org/format/2501.19051">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Swift: Rethinking RDMA Control Plane for Elastic Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+H">Han Tian</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xinyang Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenxue Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kaiqiang Xu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+D">Dian Shen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yong Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19051v1-abstract-short" style="display: inline;"> Elastic computing enables dynamic scaling to meet workload demands, and Remote Direct Memory Access (RDMA) enhances this by providing high-throughput, low-latency network communication. However, integrating RDMA into elastic computing remains a challenge, particularly in control plane operations for RDMA connection setup. This paper revisits the assumptions of prior work on high-performance RDMA… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19051v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19051v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19051v1-abstract-full" style="display: none;"> Elastic computing enables dynamic scaling to meet workload demands, and Remote Direct Memory Access (RDMA) enhances this by providing high-throughput, low-latency network communication. However, integrating RDMA into elastic computing remains a challenge, particularly in control plane operations for RDMA connection setup. This paper revisits the assumptions of prior work on high-performance RDMA for elastic computing, and reveals that extreme microsecond-level control plane optimizations are often unnecessary. By challenging the conventional beliefs on the slowness of user-space RDMA control plane and the difficulty of user-space RDMA resource sharing, we uncover new design opportunities. Our key insight is that user-space RDMA connection setup can be significantly improved with caching, while RDMA resources can be efficiently shared among processes using fork. In light of this, we propose Swift, a simple yet effective solution that co-designs RDMA with a serverless framework to optimize performance for elastic computing. At its very core, Swift handles cold and warm serverless requests by swiftly initializing the RDMA control plane with cache-optimized libibverbs, and manages fork requests by leveraging the RDMA's fork capability. Implemented with OpenWhisk, Swift delivers 30.56-46.50% higher average throughput and 18.55-37.21% lower latency, at a cost of 6.5% control plane overhead, compared to prior solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19051v1-abstract-full').style.display = 'none'; document.getElementById('2501.19051v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18371">arXiv:2501.18371</a> <span> [<a href="https://arxiv.org/pdf/2501.18371">pdf</a>, <a href="https://arxiv.org/format/2501.18371">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> FLASH-FHE: A Heterogeneous Architecture for Fully Homomorphic Encryption Acceleration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiaodian Cheng</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+G">Gang Cao</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+M">Meng Dai</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yijun Sun</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+H">Han Tian</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+D">Dian Shen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yong Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18371v1-abstract-short" style="display: inline;"> While many hardware accelerators have recently been proposed to address the inefficiency problem of fully homomorphic encryption (FHE) schemes, none of them is able to deliver optimal performance when facing real-world FHE workloads consisting of a mixture of shallow and deep computations, due primarily to their homogeneous design principle. This paper presents FLASH-FHE, the first FHE accelerat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18371v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18371v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18371v1-abstract-full" style="display: none;"> While many hardware accelerators have recently been proposed to address the inefficiency problem of fully homomorphic encryption (FHE) schemes, none of them is able to deliver optimal performance when facing real-world FHE workloads consisting of a mixture of shallow and deep computations, due primarily to their homogeneous design principle. This paper presents FLASH-FHE, the first FHE accelerator with a heterogeneous architecture for mixed workloads. At its heart, FLASH-FHE designs two types of computation clusters, ie, bootstrappable and swift, to optimize for deep and shallow workloads respectively in terms of cryptographic parameters and hardware pipelines. We organize one bootstrappable and two swift clusters into one cluster affiliation, and present a scheduling scheme that provides sufficient acceleration for deep FHE workloads by utilizing all the affiliations, while improving parallelism for shallow FHE workloads by assigning one shallow workload per affiliation and dynamically decomposing the bootstrappable cluster into multiple swift pipelines to accelerate the assigned workload. We further show that these two types of clusters can share valuable on-chip memory, improving performance without significant resource consumption. We implement FLASH-FHE with RTL and synthesize it using both 7nm and 14/12nm technology nodes, and our experiment results demonstrate that FLASH-FHE achieves an average performance improvement of $1.4\times$ and $11.2\times$ compared to state-of-the-art FHE accelerators CraterLake and F1 for deep workloads, while delivering up to $8.0\times$ speedup for shallow workloads due to its heterogeneous architecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18371v1-abstract-full').style.display = 'none'; document.getElementById('2501.18371v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18122">arXiv:2501.18122</a> <span> [<a href="https://arxiv.org/pdf/2501.18122">pdf</a>, <a href="https://arxiv.org/format/2501.18122">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> VQLTI: Long-Term Tropical Cyclone Intensity Forecasting with Physical Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyu Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lei Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kang Chen</a>, <a href="/search/cs?searchtype=author&query=Han%2C+T">Tao Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bin Li</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Lei Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18122v1-abstract-short" style="display: inline;"> Tropical cyclone (TC) intensity forecasting is crucial for early disaster warning and emergency decision-making. Numerous researchers have explored deep-learning methods to address computational and post-processing issues in operational forecasting. Regrettably, they exhibit subpar long-term forecasting capabilities. We use two strategies to enhance long-term forecasting. (1) By enhancing the matc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18122v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18122v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18122v1-abstract-full" style="display: none;"> Tropical cyclone (TC) intensity forecasting is crucial for early disaster warning and emergency decision-making. Numerous researchers have explored deep-learning methods to address computational and post-processing issues in operational forecasting. Regrettably, they exhibit subpar long-term forecasting capabilities. We use two strategies to enhance long-term forecasting. (1) By enhancing the matching between TC intensity and spatial information, we can improve long-term forecasting performance. (2) Incorporating physical knowledge and physical constraints can help mitigate the accumulation of forecasting errors. To achieve the above strategies, we propose the VQLTI framework. VQLTI transfers the TC intensity information to a discrete latent space while retaining the spatial information differences, using large-scale spatial meteorological data as conditions. Furthermore, we leverage the forecast from the weather prediction model FengWu to provide additional physical knowledge for VQLTI. Additionally, we calculate the potential intensity (PI) to impose physical constraints on the latent variables. In the global long-term TC intensity forecasting, VQLTI achieves state-of-the-art results for the 24h to 120h, with the MSW (Maximum Sustained Wind) forecast error reduced by 35.65%-42.51% compared to ECMWF-IFS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18122v1-abstract-full').style.display = 'none'; document.getElementById('2501.18122v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17819">arXiv:2501.17819</a> <span> [<a href="https://arxiv.org/pdf/2501.17819">pdf</a>, <a href="https://arxiv.org/format/2501.17819">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713405">10.1145/3706598.3713405 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> eaSEL: Promoting Social-Emotional Learning and Parent-Child Interaction through AI-Mediated Content Consumption </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+J">Jocelyn Shen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J+K">Jennifer King Chen</a>, <a href="/search/cs?searchtype=author&query=Findlater%2C+L">Leah Findlater</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+G+D">Griffin Dietz Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17819v2-abstract-short" style="display: inline;"> As children increasingly consume media on devices, parents look for ways this usage can support learning and growth, especially in domains like social-emotional learning. We introduce eaSEL, a system that (a) integrates social-emotional learning (SEL) curricula into children's video consumption by generating reflection activities and (b) facilitates parent-child discussions around digital media wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17819v2-abstract-full').style.display = 'inline'; document.getElementById('2501.17819v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17819v2-abstract-full" style="display: none;"> As children increasingly consume media on devices, parents look for ways this usage can support learning and growth, especially in domains like social-emotional learning. We introduce eaSEL, a system that (a) integrates social-emotional learning (SEL) curricula into children's video consumption by generating reflection activities and (b) facilitates parent-child discussions around digital media without requiring co-consumption of videos. We present a technical evaluation of our system's ability to detect social-emotional moments within a transcript and to generate high-quality SEL-based activities for both children and parents. Through a user study with N=20 parent-child dyads, we find that after completing an eaSEL activity, children reflect more on the emotional content of videos. Furthermore, parents find that the tool promotes meaningful active engagement and could scaffold deeper conversations around content. Our work paves directions in how AI can support children's social-emotional reflection of media and family connections in the digital age. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17819v2-abstract-full').style.display = 'none'; document.getElementById('2501.17819v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACM Conference on Human Factors in Computing Systems (CHI) 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17466">arXiv:2501.17466</a> <span> [<a href="https://arxiv.org/pdf/2501.17466">pdf</a>, <a href="https://arxiv.org/format/2501.17466">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Proteus: Achieving High-Performance Processing-Using-DRAM via Dynamic Precision Bit-Serial Arithmetic </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Oliveira%2C+G+F">Geraldo F. Oliveira</a>, <a href="/search/cs?searchtype=author&query=Kabra%2C+M">Mayank Kabra</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yuxin Guo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kangqi Chen</a>, <a href="/search/cs?searchtype=author&query=Ya%C4%9Fl%C4%B1k%C3%A7%C4%B1%2C+A+G">A. Giray Ya臒l谋k莽谋</a>, <a href="/search/cs?searchtype=author&query=Soysal%2C+M">Melina Soysal</a>, <a href="/search/cs?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/cs?searchtype=author&query=Bueno%2C+J+O">Joaquin Olivares Bueno</a>, <a href="/search/cs?searchtype=author&query=Ghose%2C+S">Saugata Ghose</a>, <a href="/search/cs?searchtype=author&query=G%C3%B3mez-Luna%2C+J">Juan G贸mez-Luna</a>, <a href="/search/cs?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17466v1-abstract-short" style="display: inline;"> Processing-using-DRAM (PUD) is a paradigm where the analog operational properties of DRAM structures are used to perform bulk logic operations. While PUD promises high throughput at low energy and area cost, we uncover three limitations of existing PUD approaches that lead to significant inefficiencies: (i) static data representation, i.e., 2's complement with fixed bit-precision, leading to unnec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17466v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17466v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17466v1-abstract-full" style="display: none;"> Processing-using-DRAM (PUD) is a paradigm where the analog operational properties of DRAM structures are used to perform bulk logic operations. While PUD promises high throughput at low energy and area cost, we uncover three limitations of existing PUD approaches that lead to significant inefficiencies: (i) static data representation, i.e., 2's complement with fixed bit-precision, leading to unnecessary computation over useless (i.e., inconsequential) data; (ii) support for only throughput-oriented execution, where the high latency of individual PUD operations can only be hidden in the presence of bulk data-level parallelism; and (iii) high latency for high-precision (e.g., 32-bit) operations. To address these issues, we propose Proteus, which builds on two key ideas. First, Proteus parallelizes the execution of independent primitives in a PUD operation by leveraging DRAM's internal parallelism. Second, Proteus reduces the bit-precision for PUD operations by leveraging narrow values (i.e., values with many leading zeros). We compare Proteus to different state-of-the-art computing platforms (CPU, GPU, and the SIMDRAM PUD architecture) for twelve real-world applications. Using a single DRAM bank, Proteus provides (i) 17x, 7.3x, and 10.2x the performance per mm2; and (ii) 90.3x, 21x, and 8.1x lower energy consumption than that of the CPU, GPU, and SIMDRAM, respectively, on average across twelve real-world applications. Proteus incurs low area cost on top of a DRAM chip (1.6%) and CPU die (0.03%). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17466v1-abstract-full').style.display = 'none'; document.getElementById('2501.17466v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17428">arXiv:2501.17428</a> <span> [<a href="https://arxiv.org/pdf/2501.17428">pdf</a>, <a href="https://arxiv.org/format/2501.17428">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> WCDT: Systematic WCET Optimization for Decision Tree Implementations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=H%C3%B6lscher%2C+N">Nils H枚lscher</a>, <a href="/search/cs?searchtype=author&query=Hakert%2C+C">Christian Hakert</a>, <a href="/search/cs?searchtype=author&query=von+der+Br%C3%BCggen%2C+G">Georg von der Br眉ggen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jian-Jia Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kuan-Hsun Chen</a>, <a href="/search/cs?searchtype=author&query=Reineke%2C+J">Jan Reineke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17428v1-abstract-short" style="display: inline;"> Machine-learning models are increasingly deployed on resource-constrained embedded systems with strict timing constraints. In such scenarios, the worst-case execution time (WCET) of the models is required to ensure safe operation. Specifically, decision trees are a prominent class of machine-learning models and the main building blocks of tree-based ensemble models (e.g., random forests), which ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17428v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17428v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17428v1-abstract-full" style="display: none;"> Machine-learning models are increasingly deployed on resource-constrained embedded systems with strict timing constraints. In such scenarios, the worst-case execution time (WCET) of the models is required to ensure safe operation. Specifically, decision trees are a prominent class of machine-learning models and the main building blocks of tree-based ensemble models (e.g., random forests), which are commonly employed in resource-constrained embedded systems. In this paper, we develop a systematic approach for WCET optimization of decision tree implementations. To this end, we introduce a linear surrogate model that estimates the execution time of individual paths through a decision tree based on the path's length and the number of taken branches. We provide an optimization algorithm that constructively builds a WCET-optimal implementation of a given decision tree with respect to this surrogate model. We experimentally evaluate both the surrogate model and the WCET-optimization algorithm. The evaluation shows that the optimization algorithm improves analytically determined WCET by up to $17\%$ compared to an unoptimized implementation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17428v1-abstract-full').style.display = 'none'; document.getElementById('2501.17428v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17186">arXiv:2501.17186</a> <span> [<a href="https://arxiv.org/pdf/2501.17186">pdf</a>, <a href="https://arxiv.org/format/2501.17186">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Complete Chess Games Enable LLM Become A Chess Master </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yinqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xintian Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haolong Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kedi Chen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+S">Shaohui Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17186v2-abstract-short" style="display: inline;"> Large language models (LLM) have shown remarkable abilities in text generation, question answering, language translation, reasoning and many other tasks. It continues to advance rapidly and is becoming increasingly influential in various fields, from technology and business to education and entertainment. Despite LLM's success in multiple areas, its ability to play abstract games, such as chess, i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17186v2-abstract-full').style.display = 'inline'; document.getElementById('2501.17186v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17186v2-abstract-full" style="display: none;"> Large language models (LLM) have shown remarkable abilities in text generation, question answering, language translation, reasoning and many other tasks. It continues to advance rapidly and is becoming increasingly influential in various fields, from technology and business to education and entertainment. Despite LLM's success in multiple areas, its ability to play abstract games, such as chess, is underexplored. Chess-playing requires the language models to output legal and reasonable moves from textual inputs. Here, we propose the Large language model ChessLLM to play full chess games. We transform the game into a textual format with the best move represented in the Forsyth-Edwards Notation. We show that by simply supervised fine-tuning, our model has achieved a professional-level Elo rating of 1788 in matches against the standard Elo-rated Stockfish when permitted to sample 10 times. We further show that data quality is important. Long-round data supervision enjoys a 350 Elo rating improvement over short-round data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17186v2-abstract-full').style.display = 'none'; document.getElementById('2501.17186v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15907">arXiv:2501.15907</a> <span> [<a href="https://arxiv.org/pdf/2501.15907">pdf</a>, <a href="https://arxiv.org/format/2501.15907">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Emilia: A Large-Scale, Extensive, Multilingual, and Diverse Dataset for Speech Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+H">Haorui He</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+Z">Zengqiang Shang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chaoren Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuyuan Li</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yicheng Gu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+H">Hua Hua</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Liwei Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chen Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiaqi Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+P">Peiyang Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuancheng Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Pengyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhizheng Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15907v1-abstract-short" style="display: inline;"> Recent advancements in speech generation have been driven by the large-scale training datasets. However, current models fall short of capturing the spontaneity and variability inherent in real-world human speech, due to their reliance on audiobook datasets limited to formal read-aloud speech styles. To bridge this gap, we introduce Emilia-Pipe, an open-source preprocessing pipeline to extract high… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15907v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15907v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15907v1-abstract-full" style="display: none;"> Recent advancements in speech generation have been driven by the large-scale training datasets. However, current models fall short of capturing the spontaneity and variability inherent in real-world human speech, due to their reliance on audiobook datasets limited to formal read-aloud speech styles. To bridge this gap, we introduce Emilia-Pipe, an open-source preprocessing pipeline to extract high-quality training data from valuable yet underexplored in-the-wild data that capture spontaneous human speech in real-world contexts. By leveraging Emilia-Pipe, we construct Emilia, the first multilingual speech generation dataset derived from in-the-wild speech data. This dataset comprises over 101k hours of speech across six languages: English, Chinese, German, French, Japanese, and Korean. Besides, we expand Emilia to Emilia-Large, a dataset exceeding 216k hours, making it the largest open-source speech generation dataset available. Extensive experiments demonstrate that Emilia significantly outperforms traditional audiobook datasets in generating spontaneous and human-like speech, showcasing superior performance in capturing diverse speaker timbre and speaking styles of real-world human speech. Furthermore, this work underscores the importance of scaling dataset size to advance speech generation research and validates the effectiveness of Emilia for both multilingual and crosslingual speech generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15907v1-abstract-full').style.display = 'none'; document.getElementById('2501.15907v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Extended version of arXiv:2407.05361, submitted to TASLP, dataset is available at: https://huggingface.co/datasets/amphion/Emilia-Dataset</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15478">arXiv:2501.15478</a> <span> [<a href="https://arxiv.org/pdf/2501.15478">pdf</a>, <a href="https://arxiv.org/format/2501.15478">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LoRAGuard: An Effective Black-box Watermarking Approach for LoRAs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+P">Peizhuo Lv</a>, <a href="/search/cs?searchtype=author&query=Xiahou%2C+Y">Yiran Xiahou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Congyi Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengjie Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shengzhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingjun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15478v1-abstract-short" style="display: inline;"> LoRA (Low-Rank Adaptation) has achieved remarkable success in the parameter-efficient fine-tuning of large models. The trained LoRA matrix can be integrated with the base model through addition or negation operation to improve performance on downstream tasks. However, the unauthorized use of LoRAs to generate harmful content highlights the need for effective mechanisms to trace their usage. A natu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15478v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15478v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15478v1-abstract-full" style="display: none;"> LoRA (Low-Rank Adaptation) has achieved remarkable success in the parameter-efficient fine-tuning of large models. The trained LoRA matrix can be integrated with the base model through addition or negation operation to improve performance on downstream tasks. However, the unauthorized use of LoRAs to generate harmful content highlights the need for effective mechanisms to trace their usage. A natural solution is to embed watermarks into LoRAs to detect unauthorized misuse. However, existing methods struggle when multiple LoRAs are combined or negation operation is applied, as these can significantly degrade watermark performance. In this paper, we introduce LoRAGuard, a novel black-box watermarking technique for detecting unauthorized misuse of LoRAs. To support both addition and negation operations, we propose the Yin-Yang watermark technique, where the Yin watermark is verified during negation operation and the Yang watermark during addition operation. Additionally, we propose a shadow-model-based watermark training approach that significantly improves effectiveness in scenarios involving multiple integrated LoRAs. Extensive experiments on both language and diffusion models show that LoRAGuard achieves nearly 100% watermark verification success and demonstrates strong effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15478v1-abstract-full').style.display = 'none'; document.getElementById('2501.15478v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15079">arXiv:2501.15079</a> <span> [<a href="https://arxiv.org/pdf/2501.15079">pdf</a>, <a href="https://arxiv.org/format/2501.15079">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Salvaging Forbidden Treasure in Medical Data: Utilizing Surrogate Outcomes and Single Records for Rare Event Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+X">Xiaohui Yin</a>, <a href="/search/cs?searchtype=author&query=Sacco%2C+S">Shane Sacco</a>, <a href="/search/cs?searchtype=author&query=Aseltine%2C+R+H">Robert H. Aseltine</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fei Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15079v1-abstract-short" style="display: inline;"> The vast repositories of Electronic Health Records (EHR) and medical claims hold untapped potential for studying rare but critical events, such as suicide attempt. Conventional setups often model suicide attempt as a univariate outcome and also exclude any ``single-record'' patients with a single documented encounter due to a lack of historical information. However, patients who were diagnosed wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15079v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15079v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15079v1-abstract-full" style="display: none;"> The vast repositories of Electronic Health Records (EHR) and medical claims hold untapped potential for studying rare but critical events, such as suicide attempt. Conventional setups often model suicide attempt as a univariate outcome and also exclude any ``single-record'' patients with a single documented encounter due to a lack of historical information. However, patients who were diagnosed with suicide attempts at the only encounter could, to some surprise, represent a substantial proportion of all attempt cases in the data, as high as 70--80%. We innovate a hybrid and integrative learning framework to leverage concurrent outcomes as surrogates and harness the forbidden yet precious information from single-record data. Our approach employs a supervised learning component to learn the latent variables that connect primary (e.g., suicide) and surrogate outcomes (e.g., mental disorders) to historical information. It simultaneously employs an unsupervised learning component to utilize the single-record data, through the shared latent variables. As such, our approach offers a general strategy for information integration that is crucial to modeling rare conditions and events. With hospital inpatient data from Connecticut, we demonstrate that single-record data and concurrent diagnoses indeed carry valuable information, and utilizing them can substantially improve suicide risk modeling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15079v1-abstract-full').style.display = 'none'; document.getElementById('2501.15079v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15043">arXiv:2501.15043</a> <span> [<a href="https://arxiv.org/pdf/2501.15043">pdf</a>, <a href="https://arxiv.org/format/2501.15043">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Prompt-Aware Controllable Shadow Removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kerui Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhiliang Wu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+W">Wenjin Hou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kun Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+H">Hehe Fan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yi Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15043v2-abstract-short" style="display: inline;"> Shadow removal aims to restore the image content in shadowed regions. While deep learning-based methods have shown promising results, they still face key challenges: 1) uncontrolled removal of all shadows, or 2) controllable removal but heavily relies on precise shadow region masks. To address these issues, we introduce a novel paradigm: prompt-aware controllable shadow removal. Unlike existing ap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15043v2-abstract-full').style.display = 'inline'; document.getElementById('2501.15043v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15043v2-abstract-full" style="display: none;"> Shadow removal aims to restore the image content in shadowed regions. While deep learning-based methods have shown promising results, they still face key challenges: 1) uncontrolled removal of all shadows, or 2) controllable removal but heavily relies on precise shadow region masks. To address these issues, we introduce a novel paradigm: prompt-aware controllable shadow removal. Unlike existing approaches, our paradigm allows for targeted shadow removal from specific subjects based on user prompts (e.g., dots, lines, or subject masks). This approach eliminates the need for shadow annotations and offers flexible, user-controlled shadow removal. Specifically, we propose an end-to-end learnable model, the Prompt-Aware Controllable Shadow Removal Network (PACSRNet). PACSRNet consists of two key modules: a prompt-aware module that generates shadow masks for the specified subject based on the user prompt, and a shadow removal module that uses the shadow prior from the first module to restore the content in the shadowed regions. Additionally, we enhance the shadow removal module by incorporating feature information from the prompt-aware module through a linear operation, providing prompt-guided support for shadow removal. Recognizing that existing shadow removal datasets lack diverse user prompts, we contribute a new dataset specifically designed for prompt-based controllable shadow removal. Extensive experimental results demonstrate the effectiveness and superiority of PACSRNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15043v2-abstract-full').style.display = 'none'; document.getElementById('2501.15043v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14851">arXiv:2501.14851</a> <span> [<a href="https://arxiv.org/pdf/2501.14851">pdf</a>, <a href="https://arxiv.org/format/2501.14851">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> JustLogic: A Comprehensive Benchmark for Evaluating Deductive Reasoning in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+M+K">Michael K. Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xikun Zhang</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14851v1-abstract-short" style="display: inline;"> Logical reasoning is a critical component of Large Language Models (LLMs), and substantial research efforts in recent years have aimed to enhance their deductive reasoning capabilities. However, existing deductive reasoning benchmarks, which are crucial for evaluating and advancing LLMs, are inadequate due to their lack of task complexity, presence of prior knowledge as a confounder, and superfici… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14851v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14851v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14851v1-abstract-full" style="display: none;"> Logical reasoning is a critical component of Large Language Models (LLMs), and substantial research efforts in recent years have aimed to enhance their deductive reasoning capabilities. However, existing deductive reasoning benchmarks, which are crucial for evaluating and advancing LLMs, are inadequate due to their lack of task complexity, presence of prior knowledge as a confounder, and superficial error analysis. To address these deficiencies, we introduce JustLogic, a synthetically generated deductive reasoning benchmark designed for rigorous evaluation of LLMs. JustLogic is (i) highly complex, capable of generating a diverse range of linguistic patterns, vocabulary, and argument structures; (ii) prior knowledge independent, eliminating the advantage of models possessing prior knowledge and ensuring that only deductive reasoning is used to answer questions; and (iii) capable of in-depth error analysis on the heterogeneous effects of reasoning depth and argument form on model accuracy. Our experimental results on JustLogic reveal that most state-of-the-art (SOTA) LLMs perform significantly worse than the human average, demonstrating substantial room for model improvement. All code and data are available at https://github.com/michaelchen-lab/JustLogic <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14851v1-abstract-full').style.display = 'none'; document.getElementById('2501.14851v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13953">arXiv:2501.13953</a> <span> [<a href="https://arxiv.org/pdf/2501.13953">pdf</a>, <a href="https://arxiv.org/format/2501.13953">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Redundancy Principles for MLLMs Benchmarks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zicheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiangyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+X">Xinyu Fang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chunyi Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaohong Liu</a>, <a href="/search/cs?searchtype=author&query=Min%2C+X">Xiongkuo Min</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+H">Haodong Duan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+G">Guangtao Zhai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13953v1-abstract-short" style="display: inline;"> With the rapid iteration of Multi-modality Large Language Models (MLLMs) and the evolving demands of the field, the number of benchmarks produced annually has surged into the hundreds. The rapid growth has inevitably led to significant redundancy among benchmarks. Therefore, it is crucial to take a step back and critically assess the current state of redundancy and propose targeted principles for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13953v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13953v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13953v1-abstract-full" style="display: none;"> With the rapid iteration of Multi-modality Large Language Models (MLLMs) and the evolving demands of the field, the number of benchmarks produced annually has surged into the hundreds. The rapid growth has inevitably led to significant redundancy among benchmarks. Therefore, it is crucial to take a step back and critically assess the current state of redundancy and propose targeted principles for constructing effective MLLM benchmarks. In this paper, we focus on redundancy from three key perspectives: 1) Redundancy of benchmark capability dimensions, 2) Redundancy in the number of test questions, and 3) Cross-benchmark redundancy within specific domains. Through the comprehensive analysis over hundreds of MLLMs' performance across more than 20 benchmarks, we aim to quantitatively measure the level of redundancy lies in existing MLLM evaluations, provide valuable insights to guide the future development of MLLM benchmarks, and offer strategies to refine and address redundancy issues effectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13953v1-abstract-full').style.display = 'none'; document.getElementById('2501.13953v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13952">arXiv:2501.13952</a> <span> [<a href="https://arxiv.org/pdf/2501.13952">pdf</a>, <a href="https://arxiv.org/format/2501.13952">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> The Dual-use Dilemma in LLMs: Do Empowering Ethical Capacities Make a Degraded Utility? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yiyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xingyu Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kexin Chen</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Y">Yuyang Du</a>, <a href="/search/cs?searchtype=author&query=Dang%2C+X">Xilin Dang</a>, <a href="/search/cs?searchtype=author&query=Heng%2C+P">Pheng-Ann Heng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13952v1-abstract-short" style="display: inline;"> Recent years have witnessed extensive efforts to enhance Large Language Models (LLMs) across various domains, alongside growing attention to their ethical implications. However, a critical challenge remains largely overlooked: LLMs must balance between rejecting harmful requests for safety and accommodating legitimate ones for utility. This paper presents a Direct Preference Optimization (DPO) bas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13952v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13952v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13952v1-abstract-full" style="display: none;"> Recent years have witnessed extensive efforts to enhance Large Language Models (LLMs) across various domains, alongside growing attention to their ethical implications. However, a critical challenge remains largely overlooked: LLMs must balance between rejecting harmful requests for safety and accommodating legitimate ones for utility. This paper presents a Direct Preference Optimization (DPO) based alignment framework that achieves better overall performance by addressing this ethical-utility trade-off, using chemical domain applications as a proof-of-concept. Our alignment pipeline starts with a GPT-assisted three-phase data generation scheme, in which we create LibraChemQA, a chemical question-answering dataset comprising 31.6k triplet instances. By incorporating an innovative balanced seed in the data generation process, our framework systematically considers both legitimate and illegitimate requests. The framework also introduces a rephrasing mechanism for efficient data augmentation that enhances the model's chemical comprehension. We further develop a novel hybrid evaluation scheme with LLM judges for precise assessment of both safety and utility. Experimental results demonstrate our model's substantial improvements in overall performance where both safety and utility are considered - our resulting model, LibraChem, outperforms leading LLMs including Claude-3, GPT-4o, and LLaMA-3 by margins of 13.44%, 7.16%, and 7.10% respectively on our released benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13952v1-abstract-full').style.display = 'none'; document.getElementById('2501.13952v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12386">arXiv:2501.12386</a> <span> [<a href="https://arxiv.org/pdf/2501.12386">pdf</a>, <a href="https://arxiv.org/format/2501.12386">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> InternVideo2.5: Empowering Video MLLMs with Long and Rich Context Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yi Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinhao Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Ziang Yan</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yinan He</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jiashuo Yu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+X">Xiangyu Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chenting Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Changlian Ma</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haian Huang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jianfei Gao</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+M">Min Dou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenhai Wang</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yali Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Limin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12386v2-abstract-short" style="display: inline;"> This paper aims to improve the performance of video multimodal large language models (MLLM) via long and rich context (LRC) modeling. As a result, we develop a new version of InternVideo2.5 with a focus on enhancing the original MLLMs' ability to perceive fine-grained details and capture long-form temporal structure in videos. Specifically, our approach incorporates dense vision task annotations i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12386v2-abstract-full').style.display = 'inline'; document.getElementById('2501.12386v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12386v2-abstract-full" style="display: none;"> This paper aims to improve the performance of video multimodal large language models (MLLM) via long and rich context (LRC) modeling. As a result, we develop a new version of InternVideo2.5 with a focus on enhancing the original MLLMs' ability to perceive fine-grained details and capture long-form temporal structure in videos. Specifically, our approach incorporates dense vision task annotations into MLLMs using direct preference optimization and develops compact spatiotemporal representations through adaptive hierarchical token compression. Experimental results demonstrate this unique design of LRC greatly improves the results of video MLLM in mainstream video understanding benchmarks (short & long), enabling the MLLM to memorize significantly longer video inputs (at least 6x longer than the original), and master specialized vision capabilities like object tracking and segmentation. Our work highlights the importance of multimodal context richness (length and fineness) in empowering MLLM's innate abilites (focus and memory), providing new insights for future research on video MLLM. Code and models are available at https://github.com/OpenGVLab/InternVideo/tree/main/InternVideo2.5 <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12386v2-abstract-full').style.display = 'none'; document.getElementById('2501.12386v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">technical report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12368">arXiv:2501.12368</a> <span> [<a href="https://arxiv.org/pdf/2501.12368">pdf</a>, <a href="https://arxiv.org/format/2501.12368">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> InternLM-XComposer2.5-Reward: A Simple Yet Effective Multi-Modal Reward Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zang%2C+Y">Yuhang Zang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+X">Xiaoyi Dong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Pan Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuhang Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziyu Liu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+S">Shengyuan Ding</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Shenxi Wu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yubo Ma</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+H">Haodong Duan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+D">Dahua Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiaqi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12368v1-abstract-short" style="display: inline;"> Despite the promising performance of Large Vision Language Models (LVLMs) in visual understanding, they occasionally generate incorrect outputs. While reward models (RMs) with reinforcement learning or test-time scaling offer the potential for improving generation quality, a critical gap remains: publicly available multi-modal RMs for LVLMs are scarce, and the implementation details of proprietary… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12368v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12368v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12368v1-abstract-full" style="display: none;"> Despite the promising performance of Large Vision Language Models (LVLMs) in visual understanding, they occasionally generate incorrect outputs. While reward models (RMs) with reinforcement learning or test-time scaling offer the potential for improving generation quality, a critical gap remains: publicly available multi-modal RMs for LVLMs are scarce, and the implementation details of proprietary models are often unclear. We bridge this gap with InternLM-XComposer2.5-Reward (IXC-2.5-Reward), a simple yet effective multi-modal reward model that aligns LVLMs with human preferences. To ensure the robustness and versatility of IXC-2.5-Reward, we set up a high-quality multi-modal preference corpus spanning text, image, and video inputs across diverse domains, such as instruction following, general understanding, text-rich documents, mathematical reasoning, and video understanding. IXC-2.5-Reward achieves excellent results on the latest multi-modal reward model benchmark and shows competitive performance on text-only reward model benchmarks. We further demonstrate three key applications of IXC-2.5-Reward: (1) Providing a supervisory signal for RL training. We integrate IXC-2.5-Reward with Proximal Policy Optimization (PPO) yields IXC-2.5-Chat, which shows consistent improvements in instruction following and multi-modal open-ended dialogue; (2) Selecting the best response from candidate responses for test-time scaling; and (3) Filtering outlier or noisy samples from existing image and video instruction tuning training data. To ensure reproducibility and facilitate further research, we have open-sourced all model weights and training recipes at https://github.com/InternLM/InternLM-XComposer <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12368v1-abstract-full').style.display = 'none'; document.getElementById('2501.12368v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Tech Report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12273">arXiv:2501.12273</a> <span> [<a href="https://arxiv.org/pdf/2501.12273">pdf</a>, <a href="https://arxiv.org/format/2501.12273">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Condor: Enhance LLM Alignment with Knowledge-Driven Data Synthesis and Refinement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+M">Maosong Cao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Taolin Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mo Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chuyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yunxin Liu</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+H">Haodong Duan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Songyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12273v1-abstract-short" style="display: inline;"> The quality of Supervised Fine-Tuning (SFT) data plays a critical role in enhancing the conversational capabilities of Large Language Models (LLMs). However, as LLMs become more advanced, the availability of high-quality human-annotated SFT data has become a significant bottleneck, necessitating a greater reliance on synthetic training data. In this work, we introduce Condor, a novel two-stage syn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12273v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12273v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12273v1-abstract-full" style="display: none;"> The quality of Supervised Fine-Tuning (SFT) data plays a critical role in enhancing the conversational capabilities of Large Language Models (LLMs). However, as LLMs become more advanced, the availability of high-quality human-annotated SFT data has become a significant bottleneck, necessitating a greater reliance on synthetic training data. In this work, we introduce Condor, a novel two-stage synthetic data generation framework that incorporates World Knowledge Tree and Self-Reflection Refinement to produce high-quality SFT data at scale. Our experimental results demonstrate that a base model fine-tuned on only 20K Condor-generated samples achieves superior performance compared to counterparts. The additional refinement stage in Condor further enables iterative self-improvement for LLMs at various scales (up to 72B), validating the effectiveness of our approach. Furthermore, our investigation into the scaling for synthetic data in post-training reveals substantial unexplored potential for performance improvements, opening promising avenues for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12273v1-abstract-full').style.display = 'none'; document.getElementById('2501.12273v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Tech Report. Github: https://github.com/InternLM/Condor</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11264">arXiv:2501.11264</a> <span> [<a href="https://arxiv.org/pdf/2501.11264">pdf</a>, <a href="https://arxiv.org/format/2501.11264">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Code Readability in the Age of Large Language Models: An Industrial Case Study from Atlassian </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Takerngsaksiri%2C+W">Wannita Takerngsaksiri</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+M">Micheal Fu</a>, <a href="/search/cs?searchtype=author&query=Tantithamthavorn%2C+C">Chakkrit Tantithamthavorn</a>, <a href="/search/cs?searchtype=author&query=Pasuksmit%2C+J">Jirat Pasuksmit</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kun Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Ming Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11264v1-abstract-short" style="display: inline;"> Programmers spend a significant amount of time reading code during the software development process. This trend is amplified by the emergence of large language models (LLMs) that automatically generate code. However, little is known about the readability of the LLM-generated code and whether it is still important from practitioners' perspectives in this new era. In this paper, we conduct a survey… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11264v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11264v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11264v1-abstract-full" style="display: none;"> Programmers spend a significant amount of time reading code during the software development process. This trend is amplified by the emergence of large language models (LLMs) that automatically generate code. However, little is known about the readability of the LLM-generated code and whether it is still important from practitioners' perspectives in this new era. In this paper, we conduct a survey to explore the practitioners' perspectives on code readability in the age of LLMs and investigate the readability of our LLM-based software development agents framework, HULA, by comparing its generated code with human-written code in real-world scenarios. Overall, the findings underscore that (1) readability remains a critical aspect of software development; (2) the readability of our LLM-generated code is comparable to human-written code, fostering the establishment of appropriate trust and driving the broad adoption of our LLM-powered software development platform. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11264v1-abstract-full').style.display = 'none'; document.getElementById('2501.11264v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures, 5 tables, under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10858">arXiv:2501.10858</a> <span> [<a href="https://arxiv.org/pdf/2501.10858">pdf</a>, <a href="https://arxiv.org/format/2501.10858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Reliable Text-to-SQL with Adaptive Abstention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kaiwen Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yueting Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaohui Yu</a>, <a href="/search/cs?searchtype=author&query=Koudas%2C+N">Nick Koudas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10858v1-abstract-short" style="display: inline;"> Large language models (LLMs) have revolutionized natural language interfaces for databases, particularly in text-to-SQL conversion. However, current approaches often generate unreliable outputs when faced with ambiguity or insufficient context. We present Reliable Text-to-SQL (RTS), a novel framework that enhances query generation reliability by incorporating abstention and human-in-the-loop mecha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10858v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10858v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10858v1-abstract-full" style="display: none;"> Large language models (LLMs) have revolutionized natural language interfaces for databases, particularly in text-to-SQL conversion. However, current approaches often generate unreliable outputs when faced with ambiguity or insufficient context. We present Reliable Text-to-SQL (RTS), a novel framework that enhances query generation reliability by incorporating abstention and human-in-the-loop mechanisms. RTS focuses on the critical schema linking phase, which aims to identify the key database elements needed for generating SQL queries. It autonomously detects potential errors during the answer generation process and responds by either abstaining or engaging in user interaction. A vital component of RTS is the Branching Point Prediction (BPP) which utilizes statistical conformal techniques on the hidden layers of the LLM model for schema linking, providing probabilistic guarantees on schema linking accuracy. We validate our approach through comprehensive experiments on the BIRD benchmark, demonstrating significant improvements in robustness and reliability. Our findings highlight the potential of combining transparent-box LLMs with human-in-the-loop processes to create more robust natural language interfaces for databases. For the BIRD benchmark, our approach achieves near-perfect schema linking accuracy, autonomously involving a human when needed. Combined with query generation, we demonstrate that near-perfect schema linking and a small query generation model can almost match SOTA accuracy achieved with a model orders of magnitude larger than the one we use. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10858v1-abstract-full').style.display = 'none'; document.getElementById('2501.10858v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10242">arXiv:2501.10242</a> <span> [<a href="https://arxiv.org/pdf/2501.10242">pdf</a>, <a href="https://arxiv.org/format/2501.10242">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Resource-Efficient Compilation of Distributed Quantum Circuits for Solving Large-Scale Wireless Communication Network Problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kuan-Cheng Chen</a>, <a href="/search/cs?searchtype=author&query=Burt%2C+F">Felix Burt</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shang Yu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chen-Yu Liu</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+M">Min-Hsiu Hsieh</a>, <a href="/search/cs?searchtype=author&query=Leung%2C+K+K">Kin K. Leung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10242v1-abstract-short" style="display: inline;"> Optimizing routing in Wireless Sensor Networks (WSNs) is pivotal for minimizing energy consumption and extending network lifetime. This paper introduces a resourceefficient compilation method for distributed quantum circuits tailored to address large-scale WSN routing problems. Leveraging a hybrid classical-quantum framework, we employ spectral clustering for network partitioning and the Quantum A… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10242v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10242v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10242v1-abstract-full" style="display: none;"> Optimizing routing in Wireless Sensor Networks (WSNs) is pivotal for minimizing energy consumption and extending network lifetime. This paper introduces a resourceefficient compilation method for distributed quantum circuits tailored to address large-scale WSN routing problems. Leveraging a hybrid classical-quantum framework, we employ spectral clustering for network partitioning and the Quantum Approximate Optimization Algorithm (QAOA) for optimizing routing within manageable subgraphs. We formulate the routing problem as a Quadratic Unconstrained Binary Optimization (QUBO) problem, providing comprehensive mathematical formulations and complexity analyses. Comparative evaluations against traditional classical algorithms demonstrate significant energy savings and enhanced scalability. Our approach underscores the potential of integrating quantum computing techniques into wireless communication networks, offering a scalable and efficient solution for future network optimization challenges <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10242v1-abstract-full').style.display = 'none'; document.getElementById('2501.10242v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06897">arXiv:2501.06897</a> <span> [<a href="https://arxiv.org/pdf/2501.06897">pdf</a>, <a href="https://arxiv.org/format/2501.06897">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> ActiveGAMER: Active GAussian Mapping through Efficient Rendering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liyan Chen</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+H">Huangying Zhan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kevin Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiangyu Xu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Q">Qingan Yan</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+C">Changjiang Cai</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yi Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06897v1-abstract-short" style="display: inline;"> We introduce ActiveGAMER, an active mapping system that utilizes 3D Gaussian Splatting (3DGS) to achieve high-quality, real-time scene mapping and exploration. Unlike traditional NeRF-based methods, which are computationally demanding and restrict active mapping performance, our approach leverages the efficient rendering capabilities of 3DGS, allowing effective and efficient exploration in complex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06897v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06897v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06897v1-abstract-full" style="display: none;"> We introduce ActiveGAMER, an active mapping system that utilizes 3D Gaussian Splatting (3DGS) to achieve high-quality, real-time scene mapping and exploration. Unlike traditional NeRF-based methods, which are computationally demanding and restrict active mapping performance, our approach leverages the efficient rendering capabilities of 3DGS, allowing effective and efficient exploration in complex environments. The core of our system is a rendering-based information gain module that dynamically identifies the most informative viewpoints for next-best-view planning, enhancing both geometric and photometric reconstruction accuracy. ActiveGAMER also integrates a carefully balanced framework, combining coarse-to-fine exploration, post-refinement, and a global-local keyframe selection strategy to maximize reconstruction completeness and fidelity. Our system autonomously explores and reconstructs environments with state-of-the-art geometric and photometric accuracy and completeness, significantly surpassing existing approaches in both aspects. Extensive evaluations on benchmark datasets such as Replica and MP3D highlight ActiveGAMER's effectiveness in active mapping tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06897v1-abstract-full').style.display = 'none'; document.getElementById('2501.06897v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06809">arXiv:2501.06809</a> <span> [<a href="https://arxiv.org/pdf/2501.06809">pdf</a>, <a href="https://arxiv.org/format/2501.06809">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RSRefSeg: Referring Remote Sensing Image Segmentation with Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyan Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiafan Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chenyang Liu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Z">Zhengxia Zou</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhenwei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06809v1-abstract-short" style="display: inline;"> Referring remote sensing image segmentation is crucial for achieving fine-grained visual understanding through free-format textual input, enabling enhanced scene and object extraction in remote sensing applications. Current research primarily utilizes pre-trained language models to encode textual descriptions and align them with visual modalities, thereby facilitating the expression of relevant vi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06809v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06809v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06809v1-abstract-full" style="display: none;"> Referring remote sensing image segmentation is crucial for achieving fine-grained visual understanding through free-format textual input, enabling enhanced scene and object extraction in remote sensing applications. Current research primarily utilizes pre-trained language models to encode textual descriptions and align them with visual modalities, thereby facilitating the expression of relevant visual features. However, these approaches often struggle to establish robust alignments between fine-grained semantic concepts, leading to inconsistent representations across textual and visual information. To address these limitations, we introduce a referring remote sensing image segmentation foundational model, RSRefSeg. RSRefSeg leverages CLIP for visual and textual encoding, employing both global and local textual semantics as filters to generate referring-related visual activation features in the latent space. These activated features then serve as input prompts for SAM, which refines the segmentation masks through its robust visual generalization capabilities. Experimental results on the RRSIS-D dataset demonstrate that RSRefSeg outperforms existing methods, underscoring the effectiveness of foundational models in enhancing multimodal task comprehension. The code is available at \url{https://github.com/KyanChen/RSRefSeg}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06809v1-abstract-full').style.display = 'none'; document.getElementById('2501.06809v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06808">arXiv:2501.06808</a> <span> [<a href="https://arxiv.org/pdf/2501.06808">pdf</a>, <a href="https://arxiv.org/format/2501.06808">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Semantic-CD: Remote Sensing Image Semantic Change Detection towards Open-vocabulary Setting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yongshuo Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lu Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyan Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chenyang Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fugen Zhou</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhenwei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06808v1-abstract-short" style="display: inline;"> Remote sensing image semantic change detection is a method used to analyze remote sensing images, aiming to identify areas of change as well as categorize these changes within images of the same location taken at different times. Traditional change detection methods often face challenges in generalizing across semantic categories in practical scenarios. To address this issue, we introduce a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06808v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06808v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06808v1-abstract-full" style="display: none;"> Remote sensing image semantic change detection is a method used to analyze remote sensing images, aiming to identify areas of change as well as categorize these changes within images of the same location taken at different times. Traditional change detection methods often face challenges in generalizing across semantic categories in practical scenarios. To address this issue, we introduce a novel approach called Semantic-CD, specifically designed for semantic change detection in remote sensing images. This method incorporates the open vocabulary semantics from the vision-language foundation model, CLIP. By utilizing CLIP's extensive vocabulary knowledge, our model enhances its ability to generalize across categories and improves segmentation through fully decoupled multi-task learning, which includes both binary change detection and semantic change detection tasks. Semantic-CD consists of four main components: a bi-temporal CLIP visual encoder for extracting features from bi-temporal images, an open semantic prompter for creating semantic cost volume maps with open vocabulary, a binary change detection decoder for generating binary change detection masks, and a semantic change detection decoder for producing semantic labels. Experimental results on the SECOND dataset demonstrate that Semantic-CD achieves more accurate masks and reduces semantic classification errors, illustrating its effectiveness in applying semantic priors from vision-language foundation models to SCD tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06808v1-abstract-full').style.display = 'none'; document.getElementById('2501.06808v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06367">arXiv:2501.06367</a> <span> [<a href="https://arxiv.org/pdf/2501.06367">pdf</a>, <a href="https://arxiv.org/format/2501.06367">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Resilient Endurance-Aware NVM-based PUF against Learning-based Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nassar%2C+H">Hassan Nassar</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+M">Ming-Liang Wei</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chia-Lin Yang</a>, <a href="/search/cs?searchtype=author&query=Henkel%2C+J">J枚rg Henkel</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kuan-Hsun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06367v1-abstract-short" style="display: inline;"> Physical Unclonable Functions (PUFs) based on Non-Volatile Memory (NVM) technology have emerged as a promising solution for secure authentication and cryptographic applications. By leveraging the multi-level cell (MLC) characteristic of NVMs, these PUFs can generate a wide range of unique responses, enhancing their resilience to machine learning (ML) modeling attacks. However, a significant issue… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06367v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06367v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06367v1-abstract-full" style="display: none;"> Physical Unclonable Functions (PUFs) based on Non-Volatile Memory (NVM) technology have emerged as a promising solution for secure authentication and cryptographic applications. By leveraging the multi-level cell (MLC) characteristic of NVMs, these PUFs can generate a wide range of unique responses, enhancing their resilience to machine learning (ML) modeling attacks. However, a significant issue with NVM-based PUFs is their endurance problem; frequent write operations lead to wear and degradation over time, reducing the reliability and lifespan of the PUF. This paper addresses these issues by offering a comprehensive model to predict and analyze the effects of endurance changes on NVM PUFs. This model provides insights into how wear impacts the PUF's quality and helps in designing more robust PUFs. Building on this model, we present a novel design for NVM PUFs that significantly improves endurance. Our design approach incorporates advanced techniques to distribute write operations more evenly and reduce stress on individual cells. The result is an NVM PUF that demonstrates a $62\times$ improvement in endurance compared to current state-of-the-art solutions while maintaining protection against learning-based attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06367v1-abstract-full').style.display = 'none'; document.getElementById('2501.06367v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05249">arXiv:2501.05249</a> <span> [<a href="https://arxiv.org/pdf/2501.05249">pdf</a>, <a href="https://arxiv.org/format/2501.05249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> RAG-WM: An Efficient Black-Box Watermarking Approach for Retrieval-Augmented Generation of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+P">Peizhuo Lv</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mengjie Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaofeng Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shengzhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxuan Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Limin Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05249v1-abstract-short" style="display: inline;"> In recent years, tremendous success has been witnessed in Retrieval-Augmented Generation (RAG), widely used to enhance Large Language Models (LLMs) in domain-specific, knowledge-intensive, and privacy-sensitive tasks. However, attackers may steal those valuable RAGs and deploy or commercialize them, making it essential to detect Intellectual Property (IP) infringement. Most existing ownership prot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05249v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05249v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05249v1-abstract-full" style="display: none;"> In recent years, tremendous success has been witnessed in Retrieval-Augmented Generation (RAG), widely used to enhance Large Language Models (LLMs) in domain-specific, knowledge-intensive, and privacy-sensitive tasks. However, attackers may steal those valuable RAGs and deploy or commercialize them, making it essential to detect Intellectual Property (IP) infringement. Most existing ownership protection solutions, such as watermarks, are designed for relational databases and texts. They cannot be directly applied to RAGs because relational database watermarks require white-box access to detect IP infringement, which is unrealistic for the knowledge base in RAGs. Meanwhile, post-processing by the adversary's deployed LLMs typically destructs text watermark information. To address those problems, we propose a novel black-box "knowledge watermark" approach, named RAG-WM, to detect IP infringement of RAGs. RAG-WM uses a multi-LLM interaction framework, comprising a Watermark Generator, Shadow LLM & RAG, and Watermark Discriminator, to create watermark texts based on watermark entity-relationship tuples and inject them into the target RAG. We evaluate RAG-WM across three domain-specific and two privacy-sensitive tasks on four benchmark LLMs. Experimental results show that RAG-WM effectively detects the stolen RAGs in various deployed LLMs. Furthermore, RAG-WM is robust against paraphrasing, unrelated content removal, knowledge insertion, and knowledge expansion attacks. Lastly, RAG-WM can also evade watermark detection approaches, highlighting its promising application in detecting IP infringement of RAG systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05249v1-abstract-full').style.display = 'none'; document.getElementById('2501.05249v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05040">arXiv:2501.05040</a> <span> [<a href="https://arxiv.org/pdf/2501.05040">pdf</a>, <a href="https://arxiv.org/format/2501.05040">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SWE-Fixer: Training Open-Source LLMs for Effective and Efficient GitHub Issue Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+C">Chengxing Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bowen Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chang Gao</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">He Du</a>, <a href="/search/cs?searchtype=author&query=Lam%2C+W">Wai Lam</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+D">Difan Zou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05040v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated remarkable proficiency across a variety of complex tasks. One significant application of LLMs is in tackling software engineering challenges, particularly in resolving real-world tasks on GitHub by fixing code based on the issues reported by the users. However, many current approaches rely on proprietary LLMs, which limits reproducibility, accessibili… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05040v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05040v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05040v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated remarkable proficiency across a variety of complex tasks. One significant application of LLMs is in tackling software engineering challenges, particularly in resolving real-world tasks on GitHub by fixing code based on the issues reported by the users. However, many current approaches rely on proprietary LLMs, which limits reproducibility, accessibility, and transparency. The critical components of LLMs for addressing software engineering issues and how their capabilities can be effectively enhanced remain unclear. To address these challenges, we introduce SWE-Fixer, a novel open-source LLM designed to effectively and efficiently resolve GitHub issues. SWE-Fixer comprises two essential modules: a code file retrieval module and a code editing module. The retrieval module employs BM25 along with a lightweight LLM model to achieve coarse-to-fine file retrieval. Subsequently, the code editing module utilizes the other LLM model to generate patches for the identified files. Then, to mitigate the lack of publicly available datasets, we compile an extensive dataset that includes 110K GitHub issues along with their corresponding patches, and train the two modules of SWE-Fixer separately. We assess our approach on the SWE-Bench Lite and Verified benchmarks, achieving state-of-the-art performance among open-source models with scores of 23.3% and 30.2%, respectively. These outcomes highlight the efficacy of our approach. We will make our model, dataset, and code publicly available at https://github.com/InternLM/SWE-Fixer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05040v1-abstract-full').style.display = 'none'; document.getElementById('2501.05040v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Our code, data, and model will be released at https://github.com/InternLM/SWE-Fixer</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04845">arXiv:2501.04845</a> <span> [<a href="https://arxiv.org/pdf/2501.04845">pdf</a>, <a href="https://arxiv.org/ps/2501.04845">ps</a>, <a href="https://arxiv.org/format/2501.04845">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Nuclear Experiment">nucl-ex</span> </div> </div> <p class="title is-5 mathjax"> Intelligent experiments through real-time AI: Fast Data Processing and Autonomous Detector Control for sPHENIX and future EIC detectors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kvapil%2C+J">J. Kvapil</a>, <a href="/search/cs?searchtype=author&query=Borca-Tasciuc%2C+G">G. Borca-Tasciuc</a>, <a href="/search/cs?searchtype=author&query=Bossi%2C+H">H. Bossi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">K. Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Y. Chen</a>, <a href="/search/cs?searchtype=author&query=Morales%2C+Y+C">Y. Corrales Morales</a>, <a href="/search/cs?searchtype=author&query=Da+Costa%2C+H">H. Da Costa</a>, <a href="/search/cs?searchtype=author&query=Da+Silva%2C+C">C. Da Silva</a>, <a href="/search/cs?searchtype=author&query=Dean%2C+C">C. Dean</a>, <a href="/search/cs?searchtype=author&query=Durham%2C+J">J. Durham</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+S">S. Fu</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+C">C. Hao</a>, <a href="/search/cs?searchtype=author&query=Harris%2C+P">P. Harris</a>, <a href="/search/cs?searchtype=author&query=Hen%2C+O">O. Hen</a>, <a href="/search/cs?searchtype=author&query=Jheng%2C+H">H. Jheng</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Y. Lee</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">P. Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">X. Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Y. Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M+X">M. X. Liu</a>, <a href="/search/cs?searchtype=author&query=Loncar%2C+V">V. Loncar</a>, <a href="/search/cs?searchtype=author&query=Mitrevski%2C+J+P">J. P. Mitrevski</a>, <a href="/search/cs?searchtype=author&query=Olvera%2C+A">A. Olvera</a>, <a href="/search/cs?searchtype=author&query=Purschke%2C+M+L">M. L. Purschke</a>, <a href="/search/cs?searchtype=author&query=Renck%2C+J+S">J. S. Renck</a> , et al. (8 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04845v1-abstract-short" style="display: inline;"> This R\&D project, initiated by the DOE Nuclear Physics AI-Machine Learning initiative in 2022, leverages AI to address data processing challenges in high-energy nuclear experiments (RHIC, LHC, and future EIC). Our focus is on developing a demonstrator for real-time processing of high-rate data streams from sPHENIX experiment tracking detectors. The limitations of a 15 kHz maximum trigger rate imp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04845v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04845v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04845v1-abstract-full" style="display: none;"> This R\&D project, initiated by the DOE Nuclear Physics AI-Machine Learning initiative in 2022, leverages AI to address data processing challenges in high-energy nuclear experiments (RHIC, LHC, and future EIC). Our focus is on developing a demonstrator for real-time processing of high-rate data streams from sPHENIX experiment tracking detectors. The limitations of a 15 kHz maximum trigger rate imposed by the calorimeters can be negated by intelligent use of streaming technology in the tracking system. The approach efficiently identifies low momentum rare heavy flavor events in high-rate p+p collisions (3MHz), using Graph Neural Network (GNN) and High Level Synthesis for Machine Learning (hls4ml). Success at sPHENIX promises immediate benefits, minimizing resources and accelerating the heavy-flavor measurements. The approach is transferable to other fields. For the EIC, we develop a DIS-electron tagger using Artificial Intelligence - Machine Learning (AI-ML) algorithms for real-time identification, showcasing the transformative potential of AI and FPGA technologies in high-energy nuclear and particle experiments real-time data processing pipelines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04845v1-abstract-full').style.display = 'none'; document.getElementById('2501.04845v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">proceedings for 42nd International Conference on High Energy Physics (ICHEP2024), 18-24 July 2024, Prague, Czech Republic</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> LA-UR-24-30394 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04477">arXiv:2501.04477</a> <span> [<a href="https://arxiv.org/pdf/2501.04477">pdf</a>, <a href="https://arxiv.org/format/2501.04477">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rethinking High-speed Image Reconstruction Framework with Spike Camera </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kang Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yajing Zheng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tiejun Huang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhaofei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04477v1-abstract-short" style="display: inline;"> Spike cameras, as innovative neuromorphic devices, generate continuous spike streams to capture high-speed scenes with lower bandwidth and higher dynamic range than traditional RGB cameras. However, reconstructing high-quality images from the spike input under low-light conditions remains challenging. Conventional learning-based methods often rely on the synthetic dataset as the supervision for tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04477v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04477v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04477v1-abstract-full" style="display: none;"> Spike cameras, as innovative neuromorphic devices, generate continuous spike streams to capture high-speed scenes with lower bandwidth and higher dynamic range than traditional RGB cameras. However, reconstructing high-quality images from the spike input under low-light conditions remains challenging. Conventional learning-based methods often rely on the synthetic dataset as the supervision for training. Still, these approaches falter when dealing with noisy spikes fired under the low-light environment, leading to further performance degradation in the real-world dataset. This phenomenon is primarily due to inadequate noise modelling and the domain gap between synthetic and real datasets, resulting in recovered images with unclear textures, excessive noise, and diminished brightness. To address these challenges, we introduce a novel spike-to-image reconstruction framework SpikeCLIP that goes beyond traditional training paradigms. Leveraging the CLIP model's powerful capability to align text and images, we incorporate the textual description of the captured scene and unpaired high-quality datasets as the supervision. Our experiments on real-world low-light datasets U-CALTECH and U-CIFAR demonstrate that SpikeCLIP significantly enhances texture details and the luminance balance of recovered images. Furthermore, the reconstructed images are well-aligned with the broader visual features needed for downstream tasks, ensuring more robust and versatile performance in challenging environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04477v1-abstract-full').style.display = 'none'; document.getElementById('2501.04477v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04005">arXiv:2501.04005</a> <span> [<a href="https://arxiv.org/pdf/2501.04005">pdf</a>, <a href="https://arxiv.org/format/2501.04005">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> LargeAD: Large-Scale Cross-Sensor Data Pretraining for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kong%2C+L">Lingdong Kong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiang Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Youquan Liu</a>, <a href="/search/cs?searchtype=author&query=Cen%2C+J">Jun Cen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Runnan Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+L">Liang Pan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziwei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04005v1-abstract-short" style="display: inline;"> Recent advancements in vision foundation models (VFMs) have revolutionized visual perception in 2D, yet their potential for 3D scene understanding, particularly in autonomous driving applications, remains underexplored. In this paper, we introduce LargeAD, a versatile and scalable framework designed for large-scale 3D pretraining across diverse real-world driving datasets. Our framework leverages… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04005v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04005v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04005v1-abstract-full" style="display: none;"> Recent advancements in vision foundation models (VFMs) have revolutionized visual perception in 2D, yet their potential for 3D scene understanding, particularly in autonomous driving applications, remains underexplored. In this paper, we introduce LargeAD, a versatile and scalable framework designed for large-scale 3D pretraining across diverse real-world driving datasets. Our framework leverages VFMs to extract semantically rich superpixels from 2D images, which are aligned with LiDAR point clouds to generate high-quality contrastive samples. This alignment facilitates cross-modal representation learning, enhancing the semantic consistency between 2D and 3D data. We introduce several key innovations: i) VFM-driven superpixel generation for detailed semantic representation, ii) a VFM-assisted contrastive learning strategy to align multimodal features, iii) superpoint temporal consistency to maintain stable representations across time, and iv) multi-source data pretraining to generalize across various LiDAR configurations. Our approach delivers significant performance improvements over state-of-the-art methods in both linear probing and fine-tuning tasks for both LiDAR-based segmentation and object detection. Extensive experiments on eleven large-scale multi-modal datasets highlight our superior performance, demonstrating the adaptability, efficiency, and robustness in real-world autonomous driving scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04005v1-abstract-full').style.display = 'none'; document.getElementById('2501.04005v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint; 16 pages, 7 figures, 8 tables; Project Page at https://ldkong.com/LargeAD</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03905">arXiv:2501.03905</a> <span> [<a href="https://arxiv.org/pdf/2501.03905">pdf</a>, <a href="https://arxiv.org/format/2501.03905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> mFabric: An Efficient and Scalable Fabric for Mixture-of-Experts Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+X">Xudong Liao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yijun Sun</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+H">Han Tian</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+X">Xinchen Wan</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yilun Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zilong Wang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhenghang Ren</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xinyang Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenxue Li</a>, <a href="/search/cs?searchtype=author&query=Tse%2C+K+F">Kin Fai Tse</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+Z">Zhizhen Zhong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guyue Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ying Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaofeng Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yiming Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03905v1-abstract-short" style="display: inline;"> Mixture-of-Expert (MoE) models outperform conventional models by selectively activating different subnets, named \emph{experts}, on a per-token basis. This gated computation generates dynamic communications that cannot be determined beforehand, challenging the existing GPU interconnects that remain \emph{static} during the distributed training process. In this paper, we advocate for a first-of-its… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03905v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03905v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03905v1-abstract-full" style="display: none;"> Mixture-of-Expert (MoE) models outperform conventional models by selectively activating different subnets, named \emph{experts}, on a per-token basis. This gated computation generates dynamic communications that cannot be determined beforehand, challenging the existing GPU interconnects that remain \emph{static} during the distributed training process. In this paper, we advocate for a first-of-its-kind system, called mFabric, that unlocks topology reconfiguration \emph{during} distributed MoE training. Towards this vision, we first perform a production measurement study and show that the MoE dynamic communication pattern has \emph{strong locality}, alleviating the requirement of global reconfiguration. Based on this, we design and implement a \emph{regionally reconfigurable high-bandwidth domain} on top of existing electrical interconnects using optical circuit switching (OCS), achieving scalability while maintaining rapid adaptability. We have built a fully functional mFabric prototype with commodity hardware and a customized collective communication runtime that trains state-of-the-art MoE models with \emph{in-training} topology reconfiguration across 32 A100 GPUs. Large-scale packet-level simulations show that mFabric delivers comparable performance as the non-blocking fat-tree fabric while boosting the training cost efficiency (e.g., performance per dollar) of four representative MoE models by 1.2$\times$--1.5$\times$ and 1.9$\times$--2.3$\times$ at 100 Gbps and 400 Gbps link bandwidths, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03905v1-abstract-full').style.display = 'none'; document.getElementById('2501.03905v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Corresponding authors: zhizhenz@mit.edu (Z. Zhong), kaichen@cse.ust.hk (K. Chen)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03675">arXiv:2501.03675</a> <span> [<a href="https://arxiv.org/pdf/2501.03675">pdf</a>, <a href="https://arxiv.org/format/2501.03675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SMIR: Efficient Synthetic Data Pipeline To Improve Multi-Image Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+A">Andrew Li</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Chalamala%2C+R">Rahul Chalamala</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qingyang Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kezhen Chen</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">James Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03675v1-abstract-short" style="display: inline;"> Vision-Language Models (VLMs) have shown strong performance in understanding single images, aided by numerous high-quality instruction datasets. However, multi-image reasoning tasks are still under-explored in the open-source community due to two main challenges: (1) scaling datasets with multiple correlated images and complex reasoning instructions is resource-intensive and maintaining quality is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03675v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03675v1-abstract-full" style="display: none;"> Vision-Language Models (VLMs) have shown strong performance in understanding single images, aided by numerous high-quality instruction datasets. However, multi-image reasoning tasks are still under-explored in the open-source community due to two main challenges: (1) scaling datasets with multiple correlated images and complex reasoning instructions is resource-intensive and maintaining quality is difficult, and (2) there is a lack of robust evaluation benchmarks for multi-image tasks. To address these issues, we introduce SMIR, an efficient synthetic data-generation pipeline for multi-image reasoning, and a high-quality dataset generated using this pipeline. Our pipeline efficiently extracts highly correlated images using multimodal embeddings, combining visual and descriptive information and leverages open-source LLMs to generate quality instructions. Using this pipeline, we generated 160K synthetic training samples, offering a cost-effective alternative to expensive closed-source solutions. Additionally, we present SMIR-BENCH, a novel multi-image reasoning evaluation benchmark comprising 200 diverse examples across 7 complex multi-image reasoning tasks. SMIR-BENCH is multi-turn and utilizes a VLM judge to evaluate free-form responses, providing a comprehensive assessment of model expressiveness and reasoning capability across modalities. We demonstrate the effectiveness of SMIR dataset by fine-tuning several open-source VLMs and evaluating their performance on SMIR-BENCH. Our results show that models trained on our dataset outperform baseline models in multi-image reasoning tasks up to 8% with a much more scalable data pipeline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03675v1-abstract-full').style.display = 'none'; document.getElementById('2501.03675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chen%2C+K&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+K&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>