CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,900 results for author: <span class="mathjax">Wang, T</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Wang%2C+T">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Wang, T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Wang%2C+T&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Wang, T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wang%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13805">arXiv:2502.13805</a> <span> [<a href="https://arxiv.org/pdf/2502.13805">pdf</a>, <a href="https://arxiv.org/format/2502.13805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AnDB: Breaking Boundaries with an AI-Native Database for Universal Semantic Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianqing Wang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+X">Xun Xue</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guoliang Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13805v1-abstract-short" style="display: inline;"> In this demonstration, we present AnDB, an AI-native database that supports traditional OLTP workloads and innovative AI-driven tasks, enabling unified semantic analysis across structured and unstructured data. While structured data analytics is mature, challenges remain in bridging the semantic gap between user queries and unstructured data. AnDB addresses these issues by leveraging cutting-edge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13805v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13805v1-abstract-full" style="display: none;"> In this demonstration, we present AnDB, an AI-native database that supports traditional OLTP workloads and innovative AI-driven tasks, enabling unified semantic analysis across structured and unstructured data. While structured data analytics is mature, challenges remain in bridging the semantic gap between user queries and unstructured data. AnDB addresses these issues by leveraging cutting-edge AI-native technologies, allowing users to perform semantic queries using intuitive SQL-like statements without requiring AI expertise. This approach eliminates the ambiguity of traditional text-to-SQL systems and provides a seamless end-to-end optimization for analyzing all data types. AnDB automates query processing by generating multiple execution plans and selecting the optimal one through its optimizer, which balances accuracy, execution time, and financial cost based on user policies and internal optimizing mechanisms. AnDB future-proofs data management infrastructure, empowering users to effectively and efficiently harness the full potential of all kinds of data without starting from scratch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13805v1-abstract-full').style.display = 'none'; document.getElementById('2502.13805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 5 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12794">arXiv:2502.12794</a> <span> [<a href="https://arxiv.org/pdf/2502.12794">pdf</a>, <a href="https://arxiv.org/format/2502.12794">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RAPID: Retrieval Augmented Training of Differentially Private Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+T">Tanqiu Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Changjiang Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+F">Fenglong Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12794v1-abstract-short" style="display: inline;"> Differentially private diffusion models (DPDMs) harness the remarkable generative capabilities of diffusion models while enforcing differential privacy (DP) for sensitive data. However, existing DPDM training approaches often suffer from significant utility loss, large memory footprint, and expensive inference cost, impeding their practical uses. To overcome such limitations, we present RAPID: Ret… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12794v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12794v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12794v1-abstract-full" style="display: none;"> Differentially private diffusion models (DPDMs) harness the remarkable generative capabilities of diffusion models while enforcing differential privacy (DP) for sensitive data. However, existing DPDM training approaches often suffer from significant utility loss, large memory footprint, and expensive inference cost, impeding their practical uses. To overcome such limitations, we present RAPID: Retrieval Augmented PrIvate Diffusion model, a novel approach that integrates retrieval augmented generation (RAG) into DPDM training. Specifically, RAPID leverages available public data to build a knowledge base of sample trajectories; when training the diffusion model on private data, RAPID computes the early sampling steps as queries, retrieves similar trajectories from the knowledge base as surrogates, and focuses on training the later sampling steps in a differentially private manner. Extensive evaluation using benchmark datasets and models demonstrates that, with the same privacy guarantee, RAPID significantly outperforms state-of-the-art approaches by large margins in generative quality, memory footprint, and inference cost, suggesting that retrieval-augmented DP training represents a promising direction for developing future privacy-preserving generative models. The code is available at: https://github.com/TanqiuJiang/RAPID <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12794v1-abstract-full').style.display = 'none'; document.getElementById('2502.12794v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12530">arXiv:2502.12530</a> <span> [<a href="https://arxiv.org/pdf/2502.12530">pdf</a>, <a href="https://arxiv.org/format/2502.12530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Policy-to-Language: Train LLMs to Explain Decisions with Flow-Matching Generated Rewards </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xinyi Yang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+L">Liang Zeng</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Heng Dong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chao Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaoran Wu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Huazhong Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Tambe%2C+M">Milind Tambe</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tonghan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12530v1-abstract-short" style="display: inline;"> As humans increasingly share environments with diverse agents powered by RL, LLMs, and beyond, the ability to explain their policies in natural language will be vital for reliable coexistence. In this paper, we build a model-agnostic explanation generator based on an LLM. The technical novelty is that the rewards for training this LLM are generated by a generative flow matching model. This model h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12530v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12530v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12530v1-abstract-full" style="display: none;"> As humans increasingly share environments with diverse agents powered by RL, LLMs, and beyond, the ability to explain their policies in natural language will be vital for reliable coexistence. In this paper, we build a model-agnostic explanation generator based on an LLM. The technical novelty is that the rewards for training this LLM are generated by a generative flow matching model. This model has a specially designed structure with a hidden layer merged with an LLM to harness the linguistic cues of explanations into generating appropriate rewards. Experiments on both RL and LLM tasks demonstrate that our method can generate dense and effective rewards while saving on expensive human feedback; it thus enables effective explanations and even improves the accuracy of the decisions in original tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12530v1-abstract-full').style.display = 'none'; document.getElementById('2502.12530v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11946">arXiv:2502.11946</a> <span> [<a href="https://arxiv.org/pdf/2502.11946">pdf</a>, <a href="https://arxiv.org/format/2502.11946">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Step-Audio: Unified Understanding and Generation in Intelligent Speech Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+A">Ailin Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Boyong Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bruce Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+C">Chao Yan</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chen Hu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+C">Chengli Feng</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+F">Fei Tian</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+F">Feiyu Shen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jingbei Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mingrui Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Peng Liu</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+R">Ruihang Miao</a>, <a href="/search/cs?searchtype=author&query=You%2C+W">Wang You</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xuerui Yang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yechang Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Z">Zheng Gong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zixin Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hongyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jianjian Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Brian Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+C">Chengting Feng</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+C">Changyi Wan</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Hanpeng Hu</a> , et al. (120 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11946v2-abstract-short" style="display: inline;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11946v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11946v2-abstract-full" style="display: none;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contributions include: 1) a 130B-parameter unified speech-text multi-modal model that achieves unified understanding and generation, with the Step-Audio-Chat version open-sourced; 2) a generative speech data engine that establishes an affordable voice cloning framework and produces the open-sourced lightweight Step-Audio-TTS-3B model through distillation; 3) an instruction-driven fine control system enabling dynamic adjustments across dialects, emotions, singing, and RAP; 4) an enhanced cognitive architecture augmented with tool calling and role-playing abilities to manage complex tasks effectively. Based on our new StepEval-Audio-360 evaluation benchmark, Step-Audio achieves state-of-the-art performance in human evaluations, especially in terms of instruction following. On open-source benchmarks like LLaMA Question, shows 9.3% average performance improvement, demonstrating our commitment to advancing the development of open-source multi-modal language technologies. Our code and models are available at https://github.com/stepfun-ai/Step-Audio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'none'; document.getElementById('2502.11946v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11484">arXiv:2502.11484</a> <span> [<a href="https://arxiv.org/pdf/2502.11484">pdf</a>, <a href="https://arxiv.org/format/2502.11484">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Dictionary-Learning-Based Data Pruning for System Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tingna Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sikai Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Limin Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11484v1-abstract-short" style="display: inline;"> System identification is normally involved in augmenting time series data by time shifting and nonlinearisation (via polynomial basis), which introduce redundancy both feature-wise and sample-wise. Many research works focus on reducing redundancy feature-wise, while less attention is paid to sample-wise redundancy. This paper proposes a novel data pruning method, called (mini-batch) FastCan, to re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11484v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11484v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11484v1-abstract-full" style="display: none;"> System identification is normally involved in augmenting time series data by time shifting and nonlinearisation (via polynomial basis), which introduce redundancy both feature-wise and sample-wise. Many research works focus on reducing redundancy feature-wise, while less attention is paid to sample-wise redundancy. This paper proposes a novel data pruning method, called (mini-batch) FastCan, to reduce sample-wise redundancy based on dictionary learning. Time series data is represented by some representative samples, called atoms, via dictionary learning. The useful samples are selected based on their correlation with the atoms. The method is tested on one simulated dataset and two benchmark datasets. The R-squared between the coefficients of models trained on the full and the coefficients of models trained on pruned datasets is adopted to evaluate the performance of data pruning methods. It is found that the proposed method significantly outperforms the random pruning method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11484v1-abstract-full').style.display = 'none'; document.getElementById('2502.11484v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11430">arXiv:2502.11430</a> <span> [<a href="https://arxiv.org/pdf/2502.11430">pdf</a>, <a href="https://arxiv.org/format/2502.11430">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> "An Image of Ourselves in Our Minds": How College-educated Online Dating Users Construct Profiles for Effective Self Presentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fan Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yun Chen</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+X">Xiaoke Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianqi Wang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+L">Long Ling</a>, <a href="/search/cs?searchtype=author&query=LC%2C+R">RAY LC</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11430v1-abstract-short" style="display: inline;"> Online dating is frequently used by individuals looking for potential relationships and intimate connections. Central to dating apps is the creation and refinement of a dating profile, which represents the way individuals desire to present themselves to potential mates, while hiding information they do not care to share. To investigate the way frequent users of dating apps construct their online p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11430v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11430v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11430v1-abstract-full" style="display: none;"> Online dating is frequently used by individuals looking for potential relationships and intimate connections. Central to dating apps is the creation and refinement of a dating profile, which represents the way individuals desire to present themselves to potential mates, while hiding information they do not care to share. To investigate the way frequent users of dating apps construct their online profiles and perceive the effectiveness of strategies taken in making profiles, we conducted semi-structured interviews with 20 experienced users who are Chinese college-educated young adults and uncovered the processes and rationales by which they make profiles for online dating, particularly in selecting images for inclusion. We found that participants used idealized photos that exaggerated their positive personality traits, sometimes traits that they do not possess but perceive others to desire, and sometimes even traits they wish they had possessed. Users also strategically used photos that show personality and habits without showing themselves, and often hid certain identifying information to reduce privacy risks. This analysis signals potential factors that are key in building online dating profiles, providing design implications for systems that limit the use of inaccurate information while still promoting self-expression in relationship platforms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11430v1-abstract-full').style.display = 'none'; document.getElementById('2502.11430v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 6 figures, to be published in CSCW 2025</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> J.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11161">arXiv:2502.11161</a> <span> [<a href="https://arxiv.org/pdf/2502.11161">pdf</a>, <a href="https://arxiv.org/format/2502.11161">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BFA: Best-Feature-Aware Fusion for Multi-View Fine-grained Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lan%2C+Z">Zihan Lan</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+W">Weixin Mao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haosheng Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Le Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tiancai Wang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+H">Haoqiang Fan</a>, <a href="/search/cs?searchtype=author&query=Yoshie%2C+O">Osamu Yoshie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11161v2-abstract-short" style="display: inline;"> In real-world scenarios, multi-view cameras are typically employed for fine-grained manipulation tasks. Existing approaches (e.g., ACT) tend to treat multi-view features equally and directly concatenate them for policy learning. However, it will introduce redundant visual information and bring higher computational costs, leading to ineffective manipulation. For a fine-grained manipulation task, it… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11161v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11161v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11161v2-abstract-full" style="display: none;"> In real-world scenarios, multi-view cameras are typically employed for fine-grained manipulation tasks. Existing approaches (e.g., ACT) tend to treat multi-view features equally and directly concatenate them for policy learning. However, it will introduce redundant visual information and bring higher computational costs, leading to ineffective manipulation. For a fine-grained manipulation task, it tends to involve multiple stages while the most contributed view for different stages is varied over time. In this paper, we propose a plug-and-play best-feature-aware (BFA) fusion strategy for multi-view manipulation tasks, which is adaptable to various policies. Built upon the visual backbone of the policy network, we design a lightweight network to predict the importance score of each view. Based on the predicted importance scores, the reweighted multi-view features are subsequently fused and input into the end-to-end policy network, enabling seamless integration. Notably, our method demonstrates outstanding performance in fine-grained manipulations. Experimental results show that our approach outperforms multiple baselines by 22-46% success rate on different tasks. Our work provides new insights and inspiration for tackling key challenges in fine-grained manipulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11161v2-abstract-full').style.display = 'none'; document.getElementById('2502.11161v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11027">arXiv:2502.11027</a> <span> [<a href="https://arxiv.org/pdf/2502.11027">pdf</a>, <a href="https://arxiv.org/format/2502.11027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Diversified Sampling Improves Scaling LLM inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianchun Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zichuan Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuanzhou Chen</a>, <a href="/search/cs?searchtype=author&query=Light%2C+J">Jonathan Light</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haifeng Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wei Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11027v1-abstract-short" style="display: inline;"> While increasing training compute has significantly improved the performance of large language models (LLMs), similar gains have not been observed when scaling inference compute. We hypothesize that the primary issue lies in the uniformity of LLM outputs, which leads to inefficient sampling as models repeatedly generate similar but inaccurate responses. Motivated by an intriguing relationship betw… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11027v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11027v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11027v1-abstract-full" style="display: none;"> While increasing training compute has significantly improved the performance of large language models (LLMs), similar gains have not been observed when scaling inference compute. We hypothesize that the primary issue lies in the uniformity of LLM outputs, which leads to inefficient sampling as models repeatedly generate similar but inaccurate responses. Motivated by an intriguing relationship between solution accuracy (Pass@10) and response diversity, we propose DivSampling-a novel and versatile sampling technique designed to enhance the diversity of candidate solutions by introducing prompt perturbations.DivSampling incorporates two categories of perturbations: task-agnostic approaches, which are general and not tailored to any specific task, and task-specific approaches, which are customized based on task content. Our theoretical analysis demonstrates that, under mild assumptions, the error rates of responses generated from diverse prompts are significantly lower compared to those produced by stationary prompts. Comprehensive evaluations across various tasks -including reasoning, mathematics, and code generation - highlight the effectiveness of DivSampling in improving solution accuracy. This scalable and efficient approach offers a new perspective on optimizing test-time inference, addressing limitations in current sampling strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11027v1-abstract-full').style.display = 'none'; document.getElementById('2502.11027v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10993">arXiv:2502.10993</a> <span> [<a href="https://arxiv.org/pdf/2502.10993">pdf</a>, <a href="https://arxiv.org/format/2502.10993">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RoseRAG: Robust Retrieval-augmented Generation with Small-scale LLMs via Margin-aware Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianci Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Haoxiang Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianze Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Ran Xu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yue Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Linjun Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+T">Tuo Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoyu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10993v1-abstract-short" style="display: inline;"> Large language models (LLMs) have achieved impressive performance but face high computational costs and latency, limiting their deployment in resource-constrained settings. In contrast, small-scale LLMs (SLMs) are more efficient yet struggle to capture evolving real-world knowledge. Retrieval-augmented generation (RAG) helps by integrating external knowledge, but imperfect retrieval can introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10993v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10993v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10993v1-abstract-full" style="display: none;"> Large language models (LLMs) have achieved impressive performance but face high computational costs and latency, limiting their deployment in resource-constrained settings. In contrast, small-scale LLMs (SLMs) are more efficient yet struggle to capture evolving real-world knowledge. Retrieval-augmented generation (RAG) helps by integrating external knowledge, but imperfect retrieval can introduce distracting noise that misleads SLMs. We propose RoseRAG, a robust RAG framework for SLMs via Margin-aware Preference Optimization. RoseRAG employs multi-turn prompting for detailed reasoning, rejection sampling for high-quality explanations, and contrastive preference selection to refine responses by maximizing the likelihood gap between preferred and non-preferred outputs. By integrating these components into a margin-aware optimization process, RoseRAG robustly enhances the accuracy and reliability of SLMs for RAG applications. Extensive experiments on three open-domain question answering benchmarks indicate that our innovative RoseRAG surpasses state-of-the-art baselines significantly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10993v1-abstract-full').style.display = 'none'; document.getElementById('2502.10993v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10844">arXiv:2502.10844</a> <span> [<a href="https://arxiv.org/pdf/2502.10844">pdf</a>, <a href="https://arxiv.org/format/2502.10844">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Be Friendly, Not Friends: How LLM Sycophancy Shapes User Trust </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yuan Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10844v2-abstract-short" style="display: inline;"> Recent studies have revealed that large language model (LLM)-powered conversational agents often exhibit `sycophancy', a tendency to adapt their responses to align with user perspectives, even at the expense of factual accuracy. However, users' perceptions of LLM sycophancy and its interplay with other anthropomorphic features (e.g., friendliness) in shaping user trust remains understudied. To bri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10844v2-abstract-full').style.display = 'inline'; document.getElementById('2502.10844v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10844v2-abstract-full" style="display: none;"> Recent studies have revealed that large language model (LLM)-powered conversational agents often exhibit `sycophancy', a tendency to adapt their responses to align with user perspectives, even at the expense of factual accuracy. However, users' perceptions of LLM sycophancy and its interplay with other anthropomorphic features (e.g., friendliness) in shaping user trust remains understudied. To bridge this gap, we conducted a 2 (Sycophancy: presence vs. absence) x 2 (Friendliness: high vs. low) between-subjects experiment (N = 224). Our study uncovered, for the first time, the intricate dynamics between LLM sycophancy and friendliness: When an LLM agent already exhibits a friendly demeanor, being sycophantic reduces perceived authenticity, thereby lowering user trust; Conversely, when the agent is less friendly, aligning its responses with user opinions makes it appear more genuine, leading to higher user trust. Our findings entail profound implications for AI persuasion through exploiting human psychological tendencies and highlight the imperative for responsible designs in user-LLM agent interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10844v2-abstract-full').style.display = 'none'; document.getElementById('2502.10844v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10248">arXiv:2502.10248</a> <span> [<a href="https://arxiv.org/pdf/2502.10248">pdf</a>, <a href="https://arxiv.org/format/2502.10248">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Step-Video-T2V Technical Report: The Practice, Challenges, and Future of Video Foundation Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+G">Guoqing Ma</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haoyang Huang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+K">Kun Yan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liangyu Chen</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+N">Nan Duan</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+S">Shengming Yin</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+C">Changyi Wan</a>, <a href="/search/cs?searchtype=author&query=Ming%2C+R">Ranchen Ming</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xiaoniu Song</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xing Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yu Zhou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+D">Deshan Sun</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+D">Deyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jian Zhou</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+K">Kaijun Tan</a>, <a href="/search/cs?searchtype=author&query=An%2C+K">Kang An</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mei Chen</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+W">Wei Ji</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qiling Wu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Wen Sun</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xin Han</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Y">Yanan Wei</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Z">Zheng Ge</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Aojie Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bin Wang</a> , et al. (90 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10248v2-abstract-short" style="display: inline;"> We present Step-Video-T2V, a state-of-the-art text-to-video pre-trained model with 30B parameters and the ability to generate videos up to 204 frames in length. A deep compression Variational Autoencoder, Video-VAE, is designed for video generation tasks, achieving 16x16 spatial and 8x temporal compression ratios, while maintaining exceptional video reconstruction quality. User prompts are encoded… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10248v2-abstract-full').style.display = 'inline'; document.getElementById('2502.10248v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10248v2-abstract-full" style="display: none;"> We present Step-Video-T2V, a state-of-the-art text-to-video pre-trained model with 30B parameters and the ability to generate videos up to 204 frames in length. A deep compression Variational Autoencoder, Video-VAE, is designed for video generation tasks, achieving 16x16 spatial and 8x temporal compression ratios, while maintaining exceptional video reconstruction quality. User prompts are encoded using two bilingual text encoders to handle both English and Chinese. A DiT with 3D full attention is trained using Flow Matching and is employed to denoise input noise into latent frames. A video-based DPO approach, Video-DPO, is applied to reduce artifacts and improve the visual quality of the generated videos. We also detail our training strategies and share key observations and insights. Step-Video-T2V's performance is evaluated on a novel video generation benchmark, Step-Video-T2V-Eval, demonstrating its state-of-the-art text-to-video quality when compared with both open-source and commercial engines. Additionally, we discuss the limitations of current diffusion-based model paradigm and outline future directions for video foundation models. We make both Step-Video-T2V and Step-Video-T2V-Eval available at https://github.com/stepfun-ai/Step-Video-T2V. The online version can be accessed from https://yuewen.cn/videos as well. Our goal is to accelerate the innovation of video foundation models and empower video content creators. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10248v2-abstract-full').style.display = 'none'; document.getElementById('2502.10248v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09672">arXiv:2502.09672</a> <span> [<a href="https://arxiv.org/pdf/2502.09672">pdf</a>, <a href="https://arxiv.org/format/2502.09672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> IMM-MOT: A Novel 3D Multi-object Tracking Framework with Interacting Multiple Model Filter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaohong Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xulong Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gang Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zili Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+L">Lei Meng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09672v1-abstract-short" style="display: inline;"> 3D Multi-Object Tracking (MOT) provides the trajectories of surrounding objects, assisting robots or vehicles in smarter path planning and obstacle avoidance. Existing 3D MOT methods based on the Tracking-by-Detection framework typically use a single motion model to track an object throughout its entire tracking process. However, objects may change their motion patterns due to variations in the su… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09672v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09672v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09672v1-abstract-full" style="display: none;"> 3D Multi-Object Tracking (MOT) provides the trajectories of surrounding objects, assisting robots or vehicles in smarter path planning and obstacle avoidance. Existing 3D MOT methods based on the Tracking-by-Detection framework typically use a single motion model to track an object throughout its entire tracking process. However, objects may change their motion patterns due to variations in the surrounding environment. In this paper, we introduce the Interacting Multiple Model filter in IMM-MOT, which accurately fits the complex motion patterns of individual objects, overcoming the limitation of single-model tracking in existing approaches. In addition, we incorporate a Damping Window mechanism into the trajectory lifecycle management, leveraging the continuous association status of trajectories to control their creation and termination, reducing the occurrence of overlooked low-confidence true targets. Furthermore, we propose the Distance-Based Score Enhancement module, which enhances the differentiation between false positives and true positives by adjusting detection scores, thereby improving the effectiveness of the Score Filter. On the NuScenes Val dataset, IMM-MOT outperforms most other single-modal models using 3D point clouds, achieving an AMOTA of 73.8%. Our project is available at https://github.com/Ap01lo/IMM-MOT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09672v1-abstract-full').style.display = 'none'; document.getElementById('2502.09672v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages,5 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 65D19; 68T40 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09334">arXiv:2502.09334</a> <span> [<a href="https://arxiv.org/pdf/2502.09334">pdf</a>, <a href="https://arxiv.org/format/2502.09334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> ThunderServe: High-performance and Cost-efficient LLM Serving in Cloud Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Youhe Jiang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+F">Fangcheng Fu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaozhe Yao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Taiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+B">Bin Cui</a>, <a href="/search/cs?searchtype=author&query=Klimovic%2C+A">Ana Klimovic</a>, <a href="/search/cs?searchtype=author&query=Yoneki%2C+E">Eiko Yoneki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09334v1-abstract-short" style="display: inline;"> Recent developments in large language models (LLMs) have demonstrated their remarkable proficiency in a range of tasks. Compared to in-house homogeneous GPU clusters, deploying LLMs in cloud environments with diverse types of GPUs is crucial for addressing the GPU shortage problem and being more cost-effective. However, the diversity of network environments and various GPU types on the cloud bring… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09334v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09334v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09334v1-abstract-full" style="display: none;"> Recent developments in large language models (LLMs) have demonstrated their remarkable proficiency in a range of tasks. Compared to in-house homogeneous GPU clusters, deploying LLMs in cloud environments with diverse types of GPUs is crucial for addressing the GPU shortage problem and being more cost-effective. However, the diversity of network environments and various GPU types on the cloud bring difficulties to achieving high-performance serving. In this work, we propose ThunderServe, a high-performance and cost-efficient LLM serving system for heterogeneous cloud environments. We introduce a novel scheduling algorithm, which optimizes the deployment plan of LLM serving to accommodate the heterogeneous resource and network bandwidth conditions in cloud environments. Furthermore, we propose a lightweight re-scheduling mechanism, designed to adapt to fluctuating online conditions (e.g., node failures, workload shifts) without the need for costly restarts of ongoing services. Empirical results in both heterogeneous cloud and homogeneous in-house environments reveal that ThunderServe delivers up to a 2.1$\times$ and on average a $1.7\times$ increase in throughput and achieves up to a 2.5$\times$ and on average a $1.5\times$ reduction in latency deadlines compared with state-of-the-art systems given the same price budget, suggesting opting for cloud services provides a more cost-efficient solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09334v1-abstract-full').style.display = 'none'; document.getElementById('2502.09334v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MLSys 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08921">arXiv:2502.08921</a> <span> [<a href="https://arxiv.org/pdf/2502.08921">pdf</a>, <a href="https://arxiv.org/format/2502.08921">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Detecting Malicious Concepts Without Image Generation in AIGC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kun Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yushu Zhang</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+S">Shuren Qi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+W">Wenying Wen</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yuming Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08921v1-abstract-short" style="display: inline;"> The task of text-to-image generation has achieved tremendous success in practice, with emerging concept generation models capable of producing highly personalized and customized content. Fervor for concept generation is increasing rapidly among users, and platforms for concept sharing have sprung up. The concept owners may upload malicious concepts and disguise them with non-malicious text descrip… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08921v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08921v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08921v1-abstract-full" style="display: none;"> The task of text-to-image generation has achieved tremendous success in practice, with emerging concept generation models capable of producing highly personalized and customized content. Fervor for concept generation is increasing rapidly among users, and platforms for concept sharing have sprung up. The concept owners may upload malicious concepts and disguise them with non-malicious text descriptions and example images to deceive users into downloading and generating malicious content. The platform needs a quick method to determine whether a concept is malicious to prevent the spread of malicious concepts. However, simply relying on concept image generation to judge whether a concept is malicious requires time and computational resources. Especially, as the number of concepts uploaded and downloaded on the platform continues to increase, this approach becomes impractical and poses a risk of generating malicious content. In this paper, we propose Concept QuickLook, the first systematic work to incorporate malicious concept detection into research, which performs detection based solely on concept files without generating any images. We define malicious concepts and design two work modes for detection: concept matching and fuzzy detection. Extensive experiments demonstrate that the proposed Concept QuickLook can detect malicious concepts and demonstrate practicality in concept sharing platforms. We also design robustness experiments to further validate the effectiveness of the solution. We hope this work can initiate malicious concept detection tasks and provide some inspiration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08921v1-abstract-full').style.display = 'none'; document.getElementById('2502.08921v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07288">arXiv:2502.07288</a> <span> [<a href="https://arxiv.org/pdf/2502.07288">pdf</a>, <a href="https://arxiv.org/format/2502.07288">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KPIs 2024 Challenge: Advancing Glomerular Segmentation from Patch- to Slide-Level </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junlin Guo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Siqi Lu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+J">Juming Xiong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lining Yu</a>, <a href="/search/cs?searchtype=author&query=Cap%2C+Q+H">Quan Huu Cap</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+P">Pengzhou Cai</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+L">Libin Lan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Ze Zhao</a>, <a href="/search/cs?searchtype=author&query=Galdran%2C+A">Adrian Galdran</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Amit Kumar</a>, <a href="/search/cs?searchtype=author&query=Deotale%2C+G">Gunjan Deotale</a>, <a href="/search/cs?searchtype=author&query=Das%2C+D+K">Dev Kumar Das</a>, <a href="/search/cs?searchtype=author&query=Paik%2C+I">Inyoung Paik</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Joonho Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+G">Geongyu Lee</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yujia Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wangkai Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoyang Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+X">Xuege Hou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zeyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shengjin Wang</a>, <a href="/search/cs?searchtype=author&query=Fischer%2C+M">Maximilian Fischer</a> , et al. (22 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07288v1-abstract-short" style="display: inline;"> Chronic kidney disease (CKD) is a major global health issue, affecting over 10% of the population and causing significant mortality. While kidney biopsy remains the gold standard for CKD diagnosis and treatment, the lack of comprehensive benchmarks for kidney pathology segmentation hinders progress in the field. To address this, we organized the Kidney Pathology Image Segmentation (KPIs) Challenge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07288v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07288v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07288v1-abstract-full" style="display: none;"> Chronic kidney disease (CKD) is a major global health issue, affecting over 10% of the population and causing significant mortality. While kidney biopsy remains the gold standard for CKD diagnosis and treatment, the lack of comprehensive benchmarks for kidney pathology segmentation hinders progress in the field. To address this, we organized the Kidney Pathology Image Segmentation (KPIs) Challenge, introducing a dataset that incorporates preclinical rodent models of CKD with over 10,000 annotated glomeruli from 60+ Periodic Acid Schiff (PAS)-stained whole slide images. The challenge includes two tasks, patch-level segmentation and whole slide image segmentation and detection, evaluated using the Dice Similarity Coefficient (DSC) and F1-score. By encouraging innovative segmentation methods that adapt to diverse CKD models and tissue conditions, the KPIs Challenge aims to advance kidney pathology analysis, establish new benchmarks, and enable precise, large-scale quantification for disease research and diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07288v1-abstract-full').style.display = 'none'; document.getElementById('2502.07288v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06865">arXiv:2502.06865</a> <span> [<a href="https://arxiv.org/pdf/2502.06865">pdf</a>, <a href="https://arxiv.org/format/2502.06865">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep Ritz method with Fourier feature mapping: A deep learning approach for solving variational models of microstructure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mema%2C+E">Ensela Mema</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a>, <a href="/search/cs?searchtype=author&query=Knap%2C+J">Jaroslaw Knap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06865v1-abstract-short" style="display: inline;"> This paper presents a novel approach that combines the Deep Ritz Method (DRM) with Fourier feature mapping to solve minimization problems comprised of multi-well, non-convex energy potentials. These problems present computational challenges as they lack a global minimum. Through an investigation of three benchmark problems in both 1D and 2D, we observe that DRM suffers from spectral bias pathology… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06865v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06865v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06865v1-abstract-full" style="display: none;"> This paper presents a novel approach that combines the Deep Ritz Method (DRM) with Fourier feature mapping to solve minimization problems comprised of multi-well, non-convex energy potentials. These problems present computational challenges as they lack a global minimum. Through an investigation of three benchmark problems in both 1D and 2D, we observe that DRM suffers from spectral bias pathology, limiting its ability to learn solutions with high frequencies. To overcome this limitation, we modify the method by introducing Fourier feature mapping. This modification involves applying a Fourier mapping to the input layer before it passes through the hidden and output layers. Our results demonstrate that Fourier feature mapping enables DRM to generate high-frequency, multiscale solutions for the benchmark problems in both 1D and 2D, offering a promising advancement in tackling complex non-convex energy minimization problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06865v1-abstract-full').style.display = 'none'; document.getElementById('2502.06865v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06173">arXiv:2502.06173</a> <span> [<a href="https://arxiv.org/pdf/2502.06173">pdf</a>, <a href="https://arxiv.org/format/2502.06173">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Uncertainty-Aware Adaptation of Large Language Models for Protein-Protein Interaction Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jantre%2C+S">Sanket Jantre</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianle Wang</a>, <a href="/search/cs?searchtype=author&query=Park%2C+G">Gilchan Park</a>, <a href="/search/cs?searchtype=author&query=Chopra%2C+K">Kriti Chopra</a>, <a href="/search/cs?searchtype=author&query=Jeon%2C+N">Nicholas Jeon</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+X">Xiaoning Qian</a>, <a href="/search/cs?searchtype=author&query=Urban%2C+N+M">Nathan M. Urban</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+B">Byung-Jun Yoon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06173v1-abstract-short" style="display: inline;"> Identification of protein-protein interactions (PPIs) helps derive cellular mechanistic understanding, particularly in the context of complex conditions such as neurodegenerative disorders, metabolic syndromes, and cancer. Large Language Models (LLMs) have demonstrated remarkable potential in predicting protein structures and interactions via automated mining of vast biomedical literature; yet the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06173v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06173v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06173v1-abstract-full" style="display: none;"> Identification of protein-protein interactions (PPIs) helps derive cellular mechanistic understanding, particularly in the context of complex conditions such as neurodegenerative disorders, metabolic syndromes, and cancer. Large Language Models (LLMs) have demonstrated remarkable potential in predicting protein structures and interactions via automated mining of vast biomedical literature; yet their inherent uncertainty remains a key challenge for deriving reproducible findings, critical for biomedical applications. In this study, we present an uncertainty-aware adaptation of LLMs for PPI analysis, leveraging fine-tuned LLaMA-3 and BioMedGPT models. To enhance prediction reliability, we integrate LoRA ensembles and Bayesian LoRA models for uncertainty quantification (UQ), ensuring confidence-calibrated insights into protein behavior. Our approach achieves competitive performance in PPI identification across diverse disease contexts while addressing model uncertainty, thereby enhancing trustworthiness and reproducibility in computational biology. These findings underscore the potential of uncertainty-aware LLM adaptation for advancing precision medicine and biomedical research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06173v1-abstract-full').style.display = 'none'; document.getElementById('2502.06173v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05189">arXiv:2502.05189</a> <span> [<a href="https://arxiv.org/pdf/2502.05189">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Geophysics">physics.geo-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Physics-Driven Self-Supervised Deep Learning for Free-Surface Multiple Elimination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jing Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tiexing Wang</a>, <a href="/search/cs?searchtype=author&query=Verschuur%2C+E">Eric Verschuur</a>, <a href="/search/cs?searchtype=author&query=Vasconcelos%2C+I">Ivan Vasconcelos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05189v1-abstract-short" style="display: inline;"> In recent years, deep learning (DL) has emerged as a promising alternative approach for various seismic processing tasks, including primary estimation (or multiple elimination), a crucial step for accurate subsurface imaging. In geophysics, DL methods are commonly based on supervised learning from large amounts of high-quality labelled data. Instead of relying on traditional supervised learning, i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05189v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05189v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05189v1-abstract-full" style="display: none;"> In recent years, deep learning (DL) has emerged as a promising alternative approach for various seismic processing tasks, including primary estimation (or multiple elimination), a crucial step for accurate subsurface imaging. In geophysics, DL methods are commonly based on supervised learning from large amounts of high-quality labelled data. Instead of relying on traditional supervised learning, in the context of free-surface multiple elimination, we propose a method in which the DL model learns to effectively parameterize the free-surface multiple-free wavefield from the full wavefield by incorporating the underlying physics into the loss computation. This, in turn, yields high-quality estimates without ever being shown any ground truth data. Currently, the network reparameterization is performed independently for each dataset. We demonstrate its effectiveness through tests on both synthetic and field data. We employ industry-standard Surface-Related Multiple Elimination (SRME) using, respectively, global least-squares adaptive subtraction and local least-squares adaptive subtraction as benchmarks. The comparison shows that the proposed method outperforms the benchmarks in estimation accuracy, achieving the most complete primary estimation and the least multiple energy leakage, but at the cost of a higher computational burden. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05189v1-abstract-full').style.display = 'none'; document.getElementById('2502.05189v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05187">arXiv:2502.05187</a> <span> [<a href="https://arxiv.org/pdf/2502.05187">pdf</a>, <a href="https://arxiv.org/format/2502.05187">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Adaptable Budget Planner for Enhancing Budget-Constrained Auto-Bidding in Online Advertising </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Duan%2C+Z">Zhijian Duan</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yusen Huo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyu Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhilin Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yeshu Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chuan Yu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jian Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+B">Bo Zheng</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+X">Xiaotie Deng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05187v1-abstract-short" style="display: inline;"> In online advertising, advertisers commonly utilize auto-bidding services to bid for impression opportunities. A typical objective of the auto-bidder is to optimize the advertiser's cumulative value of winning impressions within specified budget constraints. However, such a problem is challenging due to the complex bidding environment faced by diverse advertisers. To address this challenge, we int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05187v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05187v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05187v1-abstract-full" style="display: none;"> In online advertising, advertisers commonly utilize auto-bidding services to bid for impression opportunities. A typical objective of the auto-bidder is to optimize the advertiser's cumulative value of winning impressions within specified budget constraints. However, such a problem is challenging due to the complex bidding environment faced by diverse advertisers. To address this challenge, we introduce ABPlanner, a few-shot adaptable budget planner designed to improve budget-constrained auto-bidding. ABPlanner is based on a hierarchical bidding framework that decomposes the bidding process into shorter, manageable stages. Within this framework, ABPlanner allocates the budget across all stages, allowing a low-level auto-bidder to bids based on the budget allocation plan. The adaptability of ABPlanner is achieved through a sequential decision-making approach, inspired by in-context reinforcement learning. For each advertiser, ABPlanner adjusts the budget allocation plan episode by episode, using data from previous episodes as prompt for current decisions. This enables ABPlanner to quickly adapt to different advertisers with few-shot data, providing a sample-efficient solution. Extensive simulation experiments and real-world A/B testing validate the effectiveness of ABPlanner, demonstrating its capability to enhance the cumulative value achieved by auto-bidders. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05187v1-abstract-full').style.display = 'none'; document.getElementById('2502.05187v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In KDD 2025 ADS Track August</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05001">arXiv:2502.05001</a> <span> [<a href="https://arxiv.org/pdf/2502.05001">pdf</a>, <a href="https://arxiv.org/format/2502.05001">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A New Paradigm in Tuning Learned Indexes: A Reinforcement Learning Enhanced Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Taiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+L">Liang Liang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+G">Guang Yang</a>, <a href="/search/cs?searchtype=author&query=Heinis%2C+T">Thomas Heinis</a>, <a href="/search/cs?searchtype=author&query=Yoneki%2C+E">Eiko Yoneki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05001v2-abstract-short" style="display: inline;"> Learned Index Structures (LIS) have significantly advanced data management by leveraging machine learning models to optimize data indexing. However, designing these structures often involves critical trade-offs, making it challenging for both designers and end-users to find an optimal balance tailored to specific workloads and scenarios. While some indexes offer adjustable parameters that demand i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05001v2-abstract-full').style.display = 'inline'; document.getElementById('2502.05001v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05001v2-abstract-full" style="display: none;"> Learned Index Structures (LIS) have significantly advanced data management by leveraging machine learning models to optimize data indexing. However, designing these structures often involves critical trade-offs, making it challenging for both designers and end-users to find an optimal balance tailored to specific workloads and scenarios. While some indexes offer adjustable parameters that demand intensive manual tuning, others rely on fixed configurations based on heuristic auto-tuners or expert knowledge, which may not consistently deliver optimal performance. This paper introduces LITune, a novel framework for end-to-end automatic tuning of Learned Index Structures. LITune employs an adaptive training pipeline equipped with a tailor-made Deep Reinforcement Learning (DRL) approach to ensure stable and efficient tuning. To accommodate long-term dynamics arising from online tuning, we further enhance LITune with an on-the-fly updating mechanism termed the O2 system. These innovations allow LITune to effectively capture state transitions in online tuning scenarios and dynamically adjust to changing data distributions and workloads, marking a significant improvement over other tuning methods. Our experimental results demonstrate that LITune achieves up to a 98% reduction in runtime and a 17-fold increase in throughput compared to default parameter settings given a selected Learned Index instance. These findings highlight LITune's effectiveness and its potential to facilitate broader adoption of LIS in real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05001v2-abstract-full').style.display = 'none'; document.getElementById('2502.05001v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04899">arXiv:2502.04899</a> <span> [<a href="https://arxiv.org/pdf/2502.04899">pdf</a>, <a href="https://arxiv.org/format/2502.04899">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unified Approaches in Self-Supervised Event Stream Modeling: Progress and Prospects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Z%C3%B3lyomi%2C+L">Levente Z贸lyomi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianze Wang</a>, <a href="/search/cs?searchtype=author&query=Ennadir%2C+S">Sofiane Ennadir</a>, <a href="/search/cs?searchtype=author&query=Smirnov%2C+O">Oleg Smirnov</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+L">Lele Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04899v1-abstract-short" style="display: inline;"> The proliferation of digital interactions across diverse domains, such as healthcare, e-commerce, gaming, and finance, has resulted in the generation of vast volumes of event stream (ES) data. ES data comprises continuous sequences of timestamped events that encapsulate detailed contextual information relevant to each domain. While ES data holds significant potential for extracting actionable insi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04899v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04899v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04899v1-abstract-full" style="display: none;"> The proliferation of digital interactions across diverse domains, such as healthcare, e-commerce, gaming, and finance, has resulted in the generation of vast volumes of event stream (ES) data. ES data comprises continuous sequences of timestamped events that encapsulate detailed contextual information relevant to each domain. While ES data holds significant potential for extracting actionable insights and enhancing decision-making, its effective utilization is hindered by challenges such as the scarcity of labeled data and the fragmented nature of existing research efforts. Self-Supervised Learning (SSL) has emerged as a promising paradigm to address these challenges by enabling the extraction of meaningful representations from unlabeled ES data. In this survey, we systematically review and synthesize SSL methodologies tailored for ES modeling across multiple domains, bridging the gaps between domain-specific approaches that have traditionally operated in isolation. We present a comprehensive taxonomy of SSL techniques, encompassing both predictive and contrastive paradigms, and analyze their applicability and effectiveness within different application contexts. Furthermore, we identify critical gaps in current research and propose a future research agenda aimed at developing scalable, domain-agnostic SSL frameworks for ES modeling. By unifying disparate research efforts and highlighting cross-domain synergies, this survey aims to accelerate innovation, improve reproducibility, and expand the applicability of SSL to diverse real-world ES challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04899v1-abstract-full').style.display = 'none'; document.getElementById('2502.04899v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04116">arXiv:2502.04116</a> <span> [<a href="https://arxiv.org/pdf/2502.04116">pdf</a>, <a href="https://arxiv.org/format/2502.04116">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Generative Adversarial Networks Bridging Art and Machine Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+J">Junhao Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xuanhe Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinlang Wang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L+K+Q">Lawrence K. Q. Yan</a>, <a href="/search/cs?searchtype=author&query=Tseng%2C+H">Hong-Ming Tseng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyuan Song</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jintao Ren</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yunze Wang</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+W">Weiche Hsieh</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+B">Bowen Jing</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Junjie Yang</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04116v2-abstract-short" style="display: inline;"> Generative Adversarial Networks (GAN) have greatly influenced the development of computer vision and artificial intelligence in the past decade and also connected art and machine intelligence together. This book begins with a detailed introduction to the fundamental principles and historical development of GANs, contrasting them with traditional generative models and elucidating the core adversari… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04116v2-abstract-full').style.display = 'inline'; document.getElementById('2502.04116v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04116v2-abstract-full" style="display: none;"> Generative Adversarial Networks (GAN) have greatly influenced the development of computer vision and artificial intelligence in the past decade and also connected art and machine intelligence together. This book begins with a detailed introduction to the fundamental principles and historical development of GANs, contrasting them with traditional generative models and elucidating the core adversarial mechanisms through illustrative Python examples. The text systematically addresses the mathematical and theoretical underpinnings including probability theory, statistics, and game theory providing a solid framework for understanding the objectives, loss functions, and optimisation challenges inherent to GAN training. Subsequent chapters review classic variants such as Conditional GANs, DCGANs, InfoGAN, and LAPGAN before progressing to advanced training methodologies like Wasserstein GANs, GANs with gradient penalty, least squares GANs, and spectral normalisation techniques. The book further examines architectural enhancements and task-specific adaptations in generators and discriminators, showcasing practical implementations in high resolution image generation, artistic style transfer, video synthesis, text to image generation and other multimedia applications. The concluding sections offer insights into emerging research trends, including self-attention mechanisms, transformer-based generative models, and a comparative analysis with diffusion models, thus charting promising directions for future developments in both academic and applied settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04116v2-abstract-full').style.display = 'none'; document.getElementById('2502.04116v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03478">arXiv:2502.03478</a> <span> [<a href="https://arxiv.org/pdf/2502.03478">pdf</a>, <a href="https://arxiv.org/ps/2502.03478">ps</a>, <a href="https://arxiv.org/format/2502.03478">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> From In Silico to In Vitro: A Comprehensive Guide to Validating Bioinformatics Findings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yunze Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyuan Song</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xintian Sun</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Charles Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+C">Cheng Fei</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L+K">Lawrence KQ Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03478v1-abstract-short" style="display: inline;"> The integration of bioinformatics predictions and experimental validation plays a pivotal role in advancing biological research, from understanding molecular mechanisms to developing therapeutic strategies. Bioinformatics tools and methods offer powerful means for predicting gene functions, protein interactions, and regulatory networks, but these predictions must be validated through experimental… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03478v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03478v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03478v1-abstract-full" style="display: none;"> The integration of bioinformatics predictions and experimental validation plays a pivotal role in advancing biological research, from understanding molecular mechanisms to developing therapeutic strategies. Bioinformatics tools and methods offer powerful means for predicting gene functions, protein interactions, and regulatory networks, but these predictions must be validated through experimental approaches to ensure their biological relevance. This review explores the various methods and technologies used for experimental validation, including gene expression analysis, protein-protein interaction verification, and pathway validation. We also discuss the challenges involved in translating computational predictions to experimental settings and highlight the importance of collaboration between bioinformatics and experimental research. Finally, emerging technologies, such as CRISPR gene editing, next-generation sequencing, and artificial intelligence, are shaping the future of bioinformatics validation and driving more accurate and efficient biological discoveries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03478v1-abstract-full').style.display = 'none'; document.getElementById('2502.03478v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02749">arXiv:2502.02749</a> <span> [<a href="https://arxiv.org/pdf/2502.02749">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Unveiling Privacy and Security Gaps in Female Health Apps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hassan%2C+M">Muhammad Hassan</a>, <a href="/search/cs?searchtype=author&query=Jameel%2C+M">Mahnoor Jameel</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tian Wang</a>, <a href="/search/cs?searchtype=author&query=Bashir%2C+M">Masooda Bashir</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02749v1-abstract-short" style="display: inline;"> Female Health Applications (FHA), a growing segment of FemTech, aim to provide affordable and accessible healthcare solutions for women globally. These applications gather and monitor health and reproductive data from millions of users. With ongoing debates on women's reproductive rights and privacy, it's crucial to assess how these apps protect users' privacy. In this paper, we undertake a securi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02749v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02749v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02749v1-abstract-full" style="display: none;"> Female Health Applications (FHA), a growing segment of FemTech, aim to provide affordable and accessible healthcare solutions for women globally. These applications gather and monitor health and reproductive data from millions of users. With ongoing debates on women's reproductive rights and privacy, it's crucial to assess how these apps protect users' privacy. In this paper, we undertake a security and data protection assessment of 45 popular FHAs. Our investigation uncovers harmful permissions, extensive collection of sensitive personal and medical data, and the presence of numerous third-party tracking libraries. Furthermore, our examination of their privacy policies reveals deviations from fundamental data privacy principles. These findings highlight a significant lack of privacy and security measures for FemTech apps, especially as women's reproductive rights face growing political challenges. The results and recommendations provide valuable insights for users, app developers, and policymakers, paving the way for better privacy and security in Female Health Applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02749v1-abstract-full').style.display = 'none'; document.getElementById('2502.02749v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02590">arXiv:2502.02590</a> <span> [<a href="https://arxiv.org/pdf/2502.02590">pdf</a>, <a href="https://arxiv.org/format/2502.02590">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Articulate AnyMesh: Open-Vocabulary 3D Articulated Objects Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xiaowen Qiu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jincheng Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yian Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhehuan Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yufei Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tsun-Hsuan Wang</a>, <a href="/search/cs?searchtype=author&query=Xian%2C+Z">Zhou Xian</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+C">Chuang Gan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02590v1-abstract-short" style="display: inline;"> 3D articulated objects modeling has long been a challenging problem, since it requires to capture both accurate surface geometries and semantically meaningful and spatially precise structures, parts, and joints. Existing methods heavily depend on training data from a limited set of handcrafted articulated object categories (e.g., cabinets and drawers), which restricts their ability to model a wide… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02590v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02590v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02590v1-abstract-full" style="display: none;"> 3D articulated objects modeling has long been a challenging problem, since it requires to capture both accurate surface geometries and semantically meaningful and spatially precise structures, parts, and joints. Existing methods heavily depend on training data from a limited set of handcrafted articulated object categories (e.g., cabinets and drawers), which restricts their ability to model a wide range of articulated objects in an open-vocabulary context. To address these limitations, we propose Articulate Anymesh, an automated framework that is able to convert any rigid 3D mesh into its articulated counterpart in an open-vocabulary manner. Given a 3D mesh, our framework utilizes advanced Vision-Language Models and visual prompting techniques to extract semantic information, allowing for both the segmentation of object parts and the construction of functional joints. Our experiments show that Articulate Anymesh can generate large-scale, high-quality 3D articulated objects, including tools, toys, mechanical devices, and vehicles, significantly expanding the coverage of existing 3D articulated object datasets. Additionally, we show that these generated assets can facilitate the acquisition of new articulated object manipulation skills in simulation, which can then be transferred to a real robotic system. Our Github website is https://articulate-anymesh.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02590v1-abstract-full').style.display = 'none'; document.getElementById('2502.02590v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01401">arXiv:2502.01401</a> <span> [<a href="https://arxiv.org/pdf/2502.01401">pdf</a>, <a href="https://arxiv.org/format/2502.01401">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Evolving Symbolic 3D Visual Grounder with Weakly Supervised Reflection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mi%2C+B">Boyu Mi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hanqing Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tai Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yilun Chen</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+J">Jiangmiao Pang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01401v2-abstract-short" style="display: inline;"> 3D visual grounding (3DVG) is challenging because of the requirement of understanding on visual information, language and spatial relationships. While supervised approaches have achieved superior performance, they are constrained by the scarcity and high cost of 3D vision-language datasets. On the other hand, LLM/VLM based agents are proposed for 3DVG, eliminating the need for training data. Howev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01401v2-abstract-full').style.display = 'inline'; document.getElementById('2502.01401v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01401v2-abstract-full" style="display: none;"> 3D visual grounding (3DVG) is challenging because of the requirement of understanding on visual information, language and spatial relationships. While supervised approaches have achieved superior performance, they are constrained by the scarcity and high cost of 3D vision-language datasets. On the other hand, LLM/VLM based agents are proposed for 3DVG, eliminating the need for training data. However, these methods incur prohibitive time and token costs during inference. To address the challenges, we introduce a novel training-free symbolic framework for 3D visual grounding, namely Evolvable Symbolic Visual Grounder, that offers significantly reduced inference costs compared to previous agent-based methods while maintaining comparable performance. EaSe uses LLM generated codes to compute on spatial relationships. EaSe also implements an automatic pipeline to evaluate and optimize the quality of these codes and integrate VLMs to assist in the grounding process. Experimental results demonstrate that EaSe achieves 52.9% accuracy on Nr3D dataset and 49.2% Acc@0.25 on ScanRefer, which is top-tier among training-free methods. Moreover, it substantially reduces the inference time and cost, offering a balanced trade-off between performance and efficiency. Codes are available at https://github.com/OpenRobotLab/EaSe. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01401v2-abstract-full').style.display = 'none'; document.getElementById('2502.01401v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00653">arXiv:2502.00653</a> <span> [<a href="https://arxiv.org/pdf/2502.00653">pdf</a>, <a href="https://arxiv.org/format/2502.00653">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Towards Robust Multimodal Large Language Models Against Jailbreak Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+Z">Ziyi Yin</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuanpu Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Han Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jinghui Chen</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+F">Fenhlong Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00653v1-abstract-short" style="display: inline;"> While multimodal large language models (MLLMs) have achieved remarkable success in recent advancements, their susceptibility to jailbreak attacks has come to light. In such attacks, adversaries exploit carefully crafted prompts to coerce models into generating harmful or undesirable content. Existing defense mechanisms often rely on external inference steps or safety alignment training, both of wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00653v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00653v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00653v1-abstract-full" style="display: none;"> While multimodal large language models (MLLMs) have achieved remarkable success in recent advancements, their susceptibility to jailbreak attacks has come to light. In such attacks, adversaries exploit carefully crafted prompts to coerce models into generating harmful or undesirable content. Existing defense mechanisms often rely on external inference steps or safety alignment training, both of which are less effective and impractical when facing sophisticated adversarial perturbations in white-box scenarios. To address these challenges and bolster MLLM robustness, we introduce SafeMLLM by adopting an adversarial training framework that alternates between an attack step for generating adversarial noise and a model updating step. At the attack step, SafeMLLM generates adversarial perturbations through a newly proposed contrastive embedding attack (CoE-Attack), which optimizes token embeddings under a contrastive objective. SafeMLLM then updates model parameters to neutralize the perturbation effects while preserving model utility on benign inputs. We evaluate SafeMLLM across six MLLMs and six jailbreak methods spanning multiple modalities. Experimental results show that SafeMLLM effectively defends against diverse attacks, maintaining robust performance and utilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00653v1-abstract-full').style.display = 'none'; document.getElementById('2502.00653v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00043">arXiv:2502.00043</a> <span> [<a href="https://arxiv.org/pdf/2502.00043">pdf</a>, <a href="https://arxiv.org/ps/2502.00043">ps</a>, <a href="https://arxiv.org/format/2502.00043">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A scalable adaptive deep Koopman predictive controller for real-time optimization of mixed traffic flow </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+H">Hao Lyu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yanyong Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Pan Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+N">Nan Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00043v1-abstract-short" style="display: inline;"> The use of connected automated vehicle (CAV) is advocated to mitigate traffic oscillations in mixed traffic flow consisting of CAVs and human driven vehicles (HDVs). This study proposes an adaptive deep Koopman predictive control framework (AdapKoopPC) for regulating mixed traffic flow. Firstly, a Koopman theory-based adaptive trajectory prediction deep network (AdapKoopnet) is designed for modeli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00043v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00043v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00043v1-abstract-full" style="display: none;"> The use of connected automated vehicle (CAV) is advocated to mitigate traffic oscillations in mixed traffic flow consisting of CAVs and human driven vehicles (HDVs). This study proposes an adaptive deep Koopman predictive control framework (AdapKoopPC) for regulating mixed traffic flow. Firstly, a Koopman theory-based adaptive trajectory prediction deep network (AdapKoopnet) is designed for modeling HDVs car-following behavior. AdapKoopnet enables the representation of HDVs behavior by a linear model in a high-dimensional space. Secondly, the model predictive control is employed to smooth the mixed traffic flow, where the combination of the linear dynamic model of CAVs and linear prediction blocks from AdapKoopnet is embedded as the predictive model into the AdapKoopPC. Finally, the predictive performance of the prosed AdapKoopnet is verified using the HighD naturalistic driving dataset. Furthermore, the control performance of AdapKoopPC is validated by the numerical simulations. Results demonstrate that the AdapKoopnet provides more accuracy HDVs predicted trajectories than the baseline nonlinear models. Moreover, the proposed AdapKoopPC exhibits more effective control performance with less computation cost compared with baselines in mitigating traffic oscillations, especially at the low CAVs penetration rates. The code of proposed AdapKoopPC is open source. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00043v1-abstract-full').style.display = 'none'; document.getElementById('2502.00043v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19135">arXiv:2501.19135</a> <span> [<a href="https://arxiv.org/pdf/2501.19135">pdf</a>, <a href="https://arxiv.org/format/2501.19135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> A Tensor-Train Decomposition based Compression of LLMs on Group Vector Systolic Accelerator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+S">Sixiao Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tintin Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ang Li</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+A">Ao Shen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kai Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+K">Keyao Jiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+M">Mingqiang Huang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hao Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19135v1-abstract-short" style="display: inline;"> Large language models (LLMs) are both storage-intensive and computation-intensive, posing significant challenges when deployed on resource-constrained hardware. As linear layers in LLMs are mainly resource consuming parts, this paper develops a tensor-train decomposition (TTD) for LLMs with a further hardware implementation on FPGA. TTD compression is applied to the linear layers in ChatGLM3-6B an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19135v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19135v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19135v1-abstract-full" style="display: none;"> Large language models (LLMs) are both storage-intensive and computation-intensive, posing significant challenges when deployed on resource-constrained hardware. As linear layers in LLMs are mainly resource consuming parts, this paper develops a tensor-train decomposition (TTD) for LLMs with a further hardware implementation on FPGA. TTD compression is applied to the linear layers in ChatGLM3-6B and LLaMA2-7B models with compression ratios (CRs) for the whole network 1.94$\times$ and 1.60$\times$, respectively. The compressed LLMs are further implemented on FPGA hardware within a highly efficient group vector systolic array (GVSA) architecture, which has DSP-shared parallel vector PEs for TTD inference, as well as optimized data communication in the accelerator. Experimental results show that the corresponding TTD based LLM accelerator implemented on FPGA achieves 1.45$\times$ and 1.57$\times$ reduction in first token delay for ChatGLM3-6B and LLaMA2-7B models, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19135v1-abstract-full').style.display = 'none'; document.getElementById('2501.19135v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18659">arXiv:2501.18659</a> <span> [<a href="https://arxiv.org/pdf/2501.18659">pdf</a>, <a href="https://arxiv.org/format/2501.18659">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> SAFL: Structure-Aware Personalized Federated Learning via Client-Specific Clustering and SCSI-Guided Model Pruning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+N">Nan Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaolu Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xiao Du</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+P">Puyu Cai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18659v1-abstract-short" style="display: inline;"> Federated Learning (FL) enables clients to collaboratively train machine learning models without sharing local data, preserving privacy in diverse environments. While traditional FL approaches preserve privacy, they often struggle with high computational and communication overhead. To address these issues, model pruning is introduced as a strategy to streamline computations. However, existing prun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18659v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18659v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18659v1-abstract-full" style="display: none;"> Federated Learning (FL) enables clients to collaboratively train machine learning models without sharing local data, preserving privacy in diverse environments. While traditional FL approaches preserve privacy, they often struggle with high computational and communication overhead. To address these issues, model pruning is introduced as a strategy to streamline computations. However, existing pruning methods, when applied solely based on local data, often produce sub-models that inadequately reflect clients' specific tasks due to data insufficiency. To overcome these challenges, this paper introduces SAFL (Structure-Aware Federated Learning), a novel framework that enhances personalized federated learning through client-specific clustering and Similar Client Structure Information (SCSI)-guided model pruning. SAFL employs a two-stage process: initially, it groups clients based on data similarities and uses aggregated pruning criteria to guide the pruning process, facilitating the identification of optimal sub-models. Subsequently, clients train these pruned models and engage in server-based aggregation, ensuring tailored and efficient models for each client. This method significantly reduces computational overhead while improving inference accuracy. Extensive experiments demonstrate that SAFL markedly diminishes model size and improves performance, making it highly effective in federated environments characterized by heterogeneous data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18659v1-abstract-full').style.display = 'none'; document.getElementById('2501.18659v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18099">arXiv:2501.18099</a> <span> [<a href="https://arxiv.org/pdf/2501.18099">pdf</a>, <a href="https://arxiv.org/format/2501.18099">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Learning to Plan & Reason for Evaluation with Thinking-LLM-as-a-Judge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saha%2C+S">Swarnadeep Saha</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xian Li</a>, <a href="/search/cs?searchtype=author&query=Ghazvininejad%2C+M">Marjan Ghazvininejad</a>, <a href="/search/cs?searchtype=author&query=Weston%2C+J">Jason Weston</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianlu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18099v1-abstract-short" style="display: inline;"> LLM-as-a-Judge models generate chain-of-thought (CoT) sequences intended to capture the step-bystep reasoning process that underlies the final evaluation of a response. However, due to the lack of human annotated CoTs for evaluation, the required components and structure of effective reasoning traces remain understudied. Consequently, previous approaches often (1) constrain reasoning traces to han… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18099v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18099v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18099v1-abstract-full" style="display: none;"> LLM-as-a-Judge models generate chain-of-thought (CoT) sequences intended to capture the step-bystep reasoning process that underlies the final evaluation of a response. However, due to the lack of human annotated CoTs for evaluation, the required components and structure of effective reasoning traces remain understudied. Consequently, previous approaches often (1) constrain reasoning traces to hand-designed components, such as a list of criteria, reference answers, or verification questions and (2) structure them such that planning is intertwined with the reasoning for evaluation. In this work, we propose EvalPlanner, a preference optimization algorithm for Thinking-LLM-as-a-Judge that first generates an unconstrained evaluation plan, followed by its execution, and then the final judgment. In a self-training loop, EvalPlanner iteratively optimizes over synthetically constructed evaluation plans and executions, leading to better final verdicts. Our method achieves a new state-of-the-art performance for generative reward models on RewardBench (with a score of 93.9), despite being trained on fewer amount of, and synthetically generated, preference pairs. Additional experiments on other benchmarks like RM-Bench, JudgeBench, and FollowBenchEval further highlight the utility of both planning and reasoning for building robust LLM-as-a-Judge reasoning models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18099v1-abstract-full').style.display = 'none'; document.getElementById('2501.18099v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17750">arXiv:2501.17750</a> <span> [<a href="https://arxiv.org/pdf/2501.17750">pdf</a>, <a href="https://arxiv.org/format/2501.17750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Privacy Audit as Bits Transmission: (Im)possibilities for Audit by One Run </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiang%2C+Z">Zihang Xiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianhao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Di Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17750v1-abstract-short" style="display: inline;"> Auditing algorithms' privacy typically involves simulating a game-based protocol that guesses which of two adjacent datasets was the original input. Traditional approaches require thousands of such simulations, leading to significant computational overhead. Recent methods propose single-run auditing of the target algorithm to address this, substantially reducing computational cost. However, these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17750v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17750v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17750v1-abstract-full" style="display: none;"> Auditing algorithms' privacy typically involves simulating a game-based protocol that guesses which of two adjacent datasets was the original input. Traditional approaches require thousands of such simulations, leading to significant computational overhead. Recent methods propose single-run auditing of the target algorithm to address this, substantially reducing computational cost. However, these methods' general applicability and tightness in producing empirical privacy guarantees remain uncertain. This work studies such problems in detail. Our contributions are twofold: First, we introduce a unifying framework for privacy audits based on information-theoretic principles, modeling the audit as a bit transmission problem in a noisy channel. This formulation allows us to derive fundamental limits and develop an audit approach that yields tight privacy lower bounds for various DP protocols. Second, leveraging this framework, we demystify the method of privacy audit by one run, identifying the conditions under which single-run audits are feasible or infeasible. Our analysis provides general guidelines for conducting privacy audits and offers deeper insights into the privacy audit. Finally, through experiments, we demonstrate that our approach produces tighter privacy lower bounds on common differentially private mechanisms while requiring significantly fewer observations. We also provide a case study illustrating that our method successfully detects privacy violations in flawed implementations of private algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17750v1-abstract-full').style.display = 'none'; document.getElementById('2501.17750v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17296">arXiv:2501.17296</a> <span> [<a href="https://arxiv.org/pdf/2501.17296">pdf</a>, <a href="https://arxiv.org/format/2501.17296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-Physics Simulations via Coupled Fourier Neural Operator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+S">Shibo Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yifei Sun</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hewei Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17296v2-abstract-short" style="display: inline;"> Physical simulations are essential tools across critical fields such as mechanical and aerospace engineering, chemistry, meteorology, etc. While neural operators, particularly the Fourier Neural Operator (FNO), have shown promise in predicting simulation results with impressive performance and efficiency, they face limitations when handling real-world scenarios involving coupled multi-physics outp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17296v2-abstract-full').style.display = 'inline'; document.getElementById('2501.17296v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17296v2-abstract-full" style="display: none;"> Physical simulations are essential tools across critical fields such as mechanical and aerospace engineering, chemistry, meteorology, etc. While neural operators, particularly the Fourier Neural Operator (FNO), have shown promise in predicting simulation results with impressive performance and efficiency, they face limitations when handling real-world scenarios involving coupled multi-physics outputs. Current neural operator methods either overlook the correlations between multiple physical processes or employ simplistic architectures that inadequately capture these relationships. To overcome these challenges, we introduce a novel coupled multi-physics neural operator learning (COMPOL) framework that extends the capabilities of Fourier operator layers to model interactions among multiple physical processes. Our approach implements feature aggregation through recurrent and attention mechanisms, enabling comprehensive modeling of coupled interactions. Our method's core is an innovative system for aggregating latent features from multi-physics processes. These aggregated features serve as enriched information sources for neural operator layers, allowing our framework to capture complex physical relationships accurately. We evaluated our coupled multi-physics neural operator across diverse physical simulation tasks, including biological systems, fluid mechanics, and multiphase flow in porous media. Our proposed model demonstrates a two to three-fold improvement in predictive performance compared to existing approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17296v2-abstract-full').style.display = 'none'; document.getElementById('2501.17296v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16966">arXiv:2501.16966</a> <span> [<a href="https://arxiv.org/pdf/2501.16966">pdf</a>, <a href="https://arxiv.org/format/2501.16966">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Heterogeneity-aware Personalized Federated Learning via Adaptive Dual-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qin Li</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Haibin Cai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16966v1-abstract-short" style="display: inline;"> Federated Learning (FL) empowers multiple clients to collaboratively train machine learning models without sharing local data, making it highly applicable in heterogeneous Internet of Things (IoT) environments. However, intrinsic heterogeneity in clients' model architectures and computing capabilities often results in model accuracy loss and the intractable straggler problem, which significantly i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16966v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16966v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16966v1-abstract-full" style="display: none;"> Federated Learning (FL) empowers multiple clients to collaboratively train machine learning models without sharing local data, making it highly applicable in heterogeneous Internet of Things (IoT) environments. However, intrinsic heterogeneity in clients' model architectures and computing capabilities often results in model accuracy loss and the intractable straggler problem, which significantly impairs training effectiveness. To tackle these challenges, this paper proposes a novel Heterogeneity-aware Personalized Federated Learning method, named HAPFL, via multi-level Reinforcement Learning (RL) mechanisms. HAPFL optimizes the training process by incorporating three strategic components: 1) An RL-based heterogeneous model allocation mechanism. The parameter server employs a Proximal Policy Optimization (PPO)-based RL agent to adaptively allocate appropriately sized, differentiated models to clients based on their performance, effectively mitigating performance disparities. 2) An RL-based training intensity adjustment scheme. The parameter server leverages another PPO-based RL agent to dynamically fine-tune the training intensity for each client to further enhance training efficiency and reduce straggling latency. 3) A knowledge distillation-based mutual learning mechanism. Each client deploys both a heterogeneous local model and a homogeneous lightweight model named LiteModel, where these models undergo mutual learning through knowledge distillation. This uniform LiteModel plays a pivotal role in aggregating and sharing global knowledge, significantly enhancing the effectiveness of personalized local training. Experimental results across multiple benchmark datasets demonstrate that HAPFL not only achieves high accuracy but also substantially reduces the overall training time by 20.9%-40.4% and decreases straggling latency by 19.0%-48.0% compared to existing solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16966v1-abstract-full').style.display = 'none'; document.getElementById('2501.16966v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16368">arXiv:2501.16368</a> <span> [<a href="https://arxiv.org/pdf/2501.16368">pdf</a>, <a href="https://arxiv.org/format/2501.16368">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Foundation Models for CPS-IoT: Opportunities and Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Baris%2C+O">Ozan Baris</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yizhuo Chen</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+G">Gaofeng Dong</a>, <a href="/search/cs?searchtype=author&query=Han%2C+L">Liying Han</a>, <a href="/search/cs?searchtype=author&query=Kimura%2C+T">Tomoyoshi Kimura</a>, <a href="/search/cs?searchtype=author&query=Quan%2C+P">Pengrui Quan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruijie Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianchen Wang</a>, <a href="/search/cs?searchtype=author&query=Abdelzaher%2C+T">Tarek Abdelzaher</a>, <a href="/search/cs?searchtype=author&query=Berg%C3%A9s%2C+M">Mario Berg茅s</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+P+P">Paul Pu Liang</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+M">Mani Srivastava</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16368v2-abstract-short" style="display: inline;"> Methods from machine learning (ML) have transformed the implementation of Perception-Cognition-Communication-Action loops in Cyber-Physical Systems (CPS) and the Internet of Things (IoT), replacing mechanistic and basic statistical models with those derived from data. However, the first generation of ML approaches, which depend on supervised learning with annotated data to create task-specific mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16368v2-abstract-full').style.display = 'inline'; document.getElementById('2501.16368v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16368v2-abstract-full" style="display: none;"> Methods from machine learning (ML) have transformed the implementation of Perception-Cognition-Communication-Action loops in Cyber-Physical Systems (CPS) and the Internet of Things (IoT), replacing mechanistic and basic statistical models with those derived from data. However, the first generation of ML approaches, which depend on supervised learning with annotated data to create task-specific models, faces significant limitations in scaling to the diverse sensor modalities, deployment configurations, application tasks, and operating dynamics characterizing real-world CPS-IoT systems. The success of task-agnostic foundation models (FMs), including multimodal large language models (LLMs), in addressing similar challenges across natural language, computer vision, and human speech has generated considerable enthusiasm for and exploration of FMs and LLMs as flexible building blocks in CPS-IoT analytics pipelines, promising to reduce the need for costly task-specific engineering. Nonetheless, a significant gap persists between the current capabilities of FMs and LLMs in the CPS-IoT domain and the requirements they must meet to be viable for CPS-IoT applications. In this paper, we analyze and characterize this gap through a thorough examination of the state of the art and our research, which extends beyond it in various dimensions. Based on the results of our analysis and research, we identify essential desiderata that CPS-IoT domain-specific FMs and LLMs must satisfy to bridge this gap. We also propose actions by CPS-IoT researchers to collaborate in developing key community resources necessary for establishing FMs and LLMs as foundational tools for the next generation of CPS-IoT systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16368v2-abstract-full').style.display = 'none'; document.getElementById('2501.16368v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15995">arXiv:2501.15995</a> <span> [<a href="https://arxiv.org/pdf/2501.15995">pdf</a>, <a href="https://arxiv.org/format/2501.15995">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Brain-Inspired Decentralized Satellite Learning in Space Computing Power Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peng Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Haibin Cai</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chunxiao Jiang</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+L">Linling Kuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15995v1-abstract-short" style="display: inline;"> Satellite networks are able to collect massive space information with advanced remote sensing technologies, which is essential for real-time applications such as natural disaster monitoring. However, traditional centralized processing by the ground server incurs a severe timeliness issue caused by the transmission bottleneck of raw data. To this end, Space Computing Power Networks (Space-CPN) emer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15995v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15995v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15995v1-abstract-full" style="display: none;"> Satellite networks are able to collect massive space information with advanced remote sensing technologies, which is essential for real-time applications such as natural disaster monitoring. However, traditional centralized processing by the ground server incurs a severe timeliness issue caused by the transmission bottleneck of raw data. To this end, Space Computing Power Networks (Space-CPN) emerges as a promising architecture to coordinate the computing capability of satellites and enable on board data processing. Nevertheless, due to the natural limitations of solar panels, satellite power system is difficult to meet the energy requirements for ever-increasing intelligent computation tasks of artificial neural networks. To tackle this issue, we propose to employ spiking neural networks (SNNs), which is supported by the neuromorphic computing architecture, for on-board data processing. The extreme sparsity in its computation enables a high energy efficiency. Furthermore, to achieve effective training of these on-board models, we put forward a decentralized neuromorphic learning framework, where a communication-efficient inter-plane model aggregation method is developed with the inspiration from RelaySum. We provide a theoretical analysis to characterize the convergence behavior of the proposed algorithm, which reveals a network diameter related convergence speed. We then formulate a minimum diameter spanning tree problem on the inter-plane connectivity topology and solve it to further improve the learning performance. Extensive experiments are conducted to evaluate the superiority of the proposed method over benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15995v1-abstract-full').style.display = 'none'; document.getElementById('2501.15995v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15749">arXiv:2501.15749</a> <span> [<a href="https://arxiv.org/pdf/2501.15749">pdf</a>, <a href="https://arxiv.org/format/2501.15749">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> LLM-powered Multi-agent Framework for Goal-oriented Learning in Intelligent Tutoring System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianfu Wang</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+Y">Yi Zhan</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+J">Jianxun Lian</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhengyu Hu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+N+J">Nicholas Jing Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+X">Xing Xie</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Hui Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15749v1-abstract-short" style="display: inline;"> Intelligent Tutoring Systems (ITSs) have revolutionized education by offering personalized learning experiences. However, as goal-oriented learning, which emphasizes efficiently achieving specific objectives, becomes increasingly important in professional contexts, existing ITSs often struggle to deliver this type of targeted learning experience. In this paper, we propose GenMentor, an LLM-powered… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15749v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15749v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15749v1-abstract-full" style="display: none;"> Intelligent Tutoring Systems (ITSs) have revolutionized education by offering personalized learning experiences. However, as goal-oriented learning, which emphasizes efficiently achieving specific objectives, becomes increasingly important in professional contexts, existing ITSs often struggle to deliver this type of targeted learning experience. In this paper, we propose GenMentor, an LLM-powered multi-agent framework designed to deliver goal-oriented, personalized learning within ITS. GenMentor begins by accurately mapping learners' goals to required skills using a fine-tuned LLM trained on a custom goal-to-skill dataset. After identifying the skill gap, it schedules an efficient learning path using an evolving optimization approach, driven by a comprehensive and dynamic profile of learners' multifaceted status. Additionally, GenMentor tailors learning content with an exploration-drafting-integration mechanism to align with individual learner needs. Extensive automated and human evaluations demonstrate GenMentor's effectiveness in learning guidance and content quality. Furthermore, we have deployed it in practice and also implemented it as an application. Practical human study with professional learners further highlights its effectiveness in goal alignment and resource targeting, leading to enhanced personalization. Supplementary resources are available at https://github.com/GeminiLight/gen-mentor. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15749v1-abstract-full').style.display = 'none'; document.getElementById('2501.15749v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW 2025 (Industry Track)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15454">arXiv:2501.15454</a> <span> [<a href="https://arxiv.org/pdf/2501.15454">pdf</a>, <a href="https://arxiv.org/format/2501.15454">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> On the Discrimination and Consistency for Exemplar-Free Class Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianqi Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jingcai Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Depeng Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhi Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15454v1-abstract-short" style="display: inline;"> Exemplar-free class incremental learning (EF-CIL) is a nontrivial task that requires continuously enriching model capability with new classes while maintaining previously learned knowledge without storing and replaying any old class exemplars. An emerging theory-guided framework for CIL trains task-specific models for a shared network, shifting the pressure of forgetting to task-id prediction. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15454v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15454v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15454v1-abstract-full" style="display: none;"> Exemplar-free class incremental learning (EF-CIL) is a nontrivial task that requires continuously enriching model capability with new classes while maintaining previously learned knowledge without storing and replaying any old class exemplars. An emerging theory-guided framework for CIL trains task-specific models for a shared network, shifting the pressure of forgetting to task-id prediction. In EF-CIL, task-id prediction is more challenging due to the lack of inter-task interaction (e.g., replays of exemplars). To address this issue, we conduct a theoretical analysis of the importance and feasibility of preserving a discriminative and consistent feature space, upon which we propose a novel method termed DCNet. Concretely, it progressively maps class representations into a hyperspherical space, in which different classes are orthogonally distributed to achieve ample inter-class separation. Meanwhile, it also introduces compensatory training to adaptively adjust supervision intensity, thereby aligning the degree of intra-class aggregation. Extensive experiments and theoretical analysis verified the superiority of the proposed DCNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15454v1-abstract-full').style.display = 'none'; document.getElementById('2501.15454v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15238">arXiv:2501.15238</a> <span> [<a href="https://arxiv.org/pdf/2501.15238">pdf</a>, <a href="https://arxiv.org/ps/2501.15238">ps</a>, <a href="https://arxiv.org/format/2501.15238">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Complete Quantum Relational Hoare Logics from Optimal Transport Duality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Barthe%2C+G">Gilles Barthe</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+M">Minbo Gao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Theo Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+L">Li Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15238v1-abstract-short" style="display: inline;"> We introduce a quantitative relational Hoare logic for quantum programs. Assertions of the logic range over a new infinitary extension of positive semidefinite operators. We prove that our logic is sound, and complete for bounded postconditions and almost surely terminating programs. Our completeness result is based on a quantum version of the duality theorem from optimal transport. We also define… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15238v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15238v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15238v1-abstract-full" style="display: none;"> We introduce a quantitative relational Hoare logic for quantum programs. Assertions of the logic range over a new infinitary extension of positive semidefinite operators. We prove that our logic is sound, and complete for bounded postconditions and almost surely terminating programs. Our completeness result is based on a quantum version of the duality theorem from optimal transport. We also define a complete embedding into our logic of a relational Hoare logic with projective assertions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15238v1-abstract-full').style.display = 'none'; document.getElementById('2501.15238v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14371">arXiv:2501.14371</a> <span> [<a href="https://arxiv.org/pdf/2501.14371">pdf</a>, <a href="https://arxiv.org/format/2501.14371">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DRESSing Up LLM: Efficient Stylized Question-Answering via Style Subspace Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yifeng Xu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yang Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianlong Wang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xin Gao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junfeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasha Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14371v1-abstract-short" style="display: inline;"> We introduce DRESS, a novel approach for generating stylized large language model (LLM) responses through representation editing. Existing methods like prompting and fine-tuning are either insufficient for complex style adaptation or computationally expensive, particularly in tasks like NPC creation or character role-playing. Our approach leverages the over-parameterized nature of LLMs to disentan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14371v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14371v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14371v1-abstract-full" style="display: none;"> We introduce DRESS, a novel approach for generating stylized large language model (LLM) responses through representation editing. Existing methods like prompting and fine-tuning are either insufficient for complex style adaptation or computationally expensive, particularly in tasks like NPC creation or character role-playing. Our approach leverages the over-parameterized nature of LLMs to disentangle a style-relevant subspace within the model's representation space to conduct representation editing, ensuring a minimal impact on the original semantics. By applying adaptive editing strengths, we dynamically adjust the steering vectors in the style subspace to maintain both stylistic fidelity and semantic integrity. We develop two stylized QA benchmark datasets to validate the effectiveness of DRESS, and the results demonstrate significant improvements compared to baseline methods such as prompting and ITI. In short, DRESS is a lightweight, train-free solution for enhancing LLMs with flexible and effective style control, making it particularly useful for developing stylized conversational agents. Codes and benchmark datasets are available at https://github.com/ArthurLeoM/DRESS-LLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14371v1-abstract-full').style.display = 'none'; document.getElementById('2501.14371v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025 Accepted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14302">arXiv:2501.14302</a> <span> [<a href="https://arxiv.org/pdf/2501.14302">pdf</a>, <a href="https://arxiv.org/format/2501.14302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TD-RD: A Top-Down Benchmark with Real-Time Framework for Road Damage Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+X">Xi Xiao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhengji Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wentao Wang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jiacheng Xie</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Houjie Lin</a>, <a href="/search/cs?searchtype=author&query=Roy%2C+S+K">Swalpa Kumar Roy</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Min Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14302v1-abstract-short" style="display: inline;"> Object detection has witnessed remarkable advancements over the past decade, largely driven by breakthroughs in deep learning and the proliferation of large scale datasets. However, the domain of road damage detection remains relatively under explored, despite its critical significance for applications such as infrastructure maintenance and road safety. This paper addresses this gap by introducing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14302v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14302v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14302v1-abstract-full" style="display: none;"> Object detection has witnessed remarkable advancements over the past decade, largely driven by breakthroughs in deep learning and the proliferation of large scale datasets. However, the domain of road damage detection remains relatively under explored, despite its critical significance for applications such as infrastructure maintenance and road safety. This paper addresses this gap by introducing a novel top down benchmark that offers a complementary perspective to existing datasets, specifically tailored for road damage detection. Our proposed Top Down Road Damage Detection Dataset (TDRD) includes three primary categories of road damage cracks, potholes, and patches captured from a top down viewpoint. The dataset consists of 7,088 high resolution images, encompassing 12,882 annotated instances of road damage. Additionally, we present a novel real time object detection framework, TDYOLOV10, designed to handle the unique challenges posed by the TDRD dataset. Comparative studies with state of the art models demonstrate competitive baseline results. By releasing TDRD, we aim to accelerate research in this crucial area. A sample of the dataset will be made publicly available upon the paper's acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14302v1-abstract-full').style.display = 'none'; document.getElementById('2501.14302v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14273">arXiv:2501.14273</a> <span> [<a href="https://arxiv.org/pdf/2501.14273">pdf</a>, <a href="https://arxiv.org/format/2501.14273">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Characteristic-Specific Partial Fine-Tuning for Efficient Emotion and Speaker Adaptation in Codec Language Text-to-Speech Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianrui Wang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+M">Meng Ge</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&query=Qiang%2C+C">Chunyu Qiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zikang Huang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yu Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaobao Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xie Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Longbiao Wang</a>, <a href="/search/cs?searchtype=author&query=Dang%2C+J">Jianwu Dang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14273v1-abstract-short" style="display: inline;"> Recently, emotional speech generation and speaker cloning have garnered significant interest in text-to-speech (TTS). With the open-sourcing of codec language TTS models trained on massive datasets with large-scale parameters, adapting these general pre-trained TTS models to generate speech with specific emotional expressions and target speaker characteristics has become a topic of great attention… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14273v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14273v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14273v1-abstract-full" style="display: none;"> Recently, emotional speech generation and speaker cloning have garnered significant interest in text-to-speech (TTS). With the open-sourcing of codec language TTS models trained on massive datasets with large-scale parameters, adapting these general pre-trained TTS models to generate speech with specific emotional expressions and target speaker characteristics has become a topic of great attention. Common approaches, such as full and adapter-based fine-tuning, often overlook the specific contributions of model parameters to emotion and speaker control. Treating all parameters uniformly during fine-tuning, especially when the target data has limited content diversity compared to the pre-training corpus, results in slow training speed and an increased risk of catastrophic forgetting. To address these challenges, we propose a characteristic-specific partial fine-tuning strategy, short as CSP-FT. First, we use a weighted-sum approach to analyze the contributions of different Transformer layers in a pre-trained codec language TTS model for emotion and speaker control in the generated speech. We then selectively fine-tune the layers with the highest and lowest characteristic-specific contributions to generate speech with target emotional expression and speaker identity. Experimental results demonstrate that our method achieves performance comparable to, or even surpassing, full fine-tuning in generating speech with specific emotional expressions and speaker identities. Additionally, CSP-FT delivers approximately 2x faster training speeds, fine-tunes only around 8% of parameters, and significantly reduces catastrophic forgetting. Furthermore, we show that codec language TTS models perform competitively with self-supervised models in speaker identification and emotion classification tasks, offering valuable insights for developing universal speech processing models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14273v1-abstract-full').style.display = 'none'; document.getElementById('2501.14273v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14249">arXiv:2501.14249</a> <span> [<a href="https://arxiv.org/pdf/2501.14249">pdf</a>, <a href="https://arxiv.org/format/2501.14249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Humanity's Last Exam </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Gatti%2C+A">Alice Gatti</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Ziwen Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Josephina Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hugh Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C+B+C">Chen Bo Calvin Zhang</a>, <a href="/search/cs?searchtype=author&query=Shaaban%2C+M">Mohamed Shaaban</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+J">John Ling</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Sean Shi</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+M">Michael Choi</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+A">Anish Agrawal</a>, <a href="/search/cs?searchtype=author&query=Chopra%2C+A">Arnav Chopra</a>, <a href="/search/cs?searchtype=author&query=Khoja%2C+A">Adam Khoja</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+R">Ryan Kim</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+R">Richard Ren</a>, <a href="/search/cs?searchtype=author&query=Hausenloy%2C+J">Jason Hausenloy</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+O">Oliver Zhang</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tung Nguyen</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+D">Daron Anderson</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+I+A">Imad Ali Shah</a>, <a href="/search/cs?searchtype=author&query=Doroshenko%2C+M">Mikhail Doroshenko</a>, <a href="/search/cs?searchtype=author&query=Stokes%2C+A+C">Alun Cennyth Stokes</a>, <a href="/search/cs?searchtype=author&query=Mahmood%2C+M">Mobeen Mahmood</a> , et al. (710 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14249v4-abstract-short" style="display: inline;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v4-abstract-full').style.display = 'inline'; document.getElementById('2501.14249v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14249v4-abstract-full" style="display: none;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of human knowledge, designed to be the final closed-ended academic benchmark of its kind with broad subject coverage. HLE consists of 3,000 questions across dozens of subjects, including mathematics, humanities, and the natural sciences. HLE is developed globally by subject-matter experts and consists of multiple-choice and short-answer questions suitable for automated grading. Each question has a known solution that is unambiguous and easily verifiable, but cannot be quickly answered via internet retrieval. State-of-the-art LLMs demonstrate low accuracy and calibration on HLE, highlighting a significant gap between current LLM capabilities and the expert human frontier on closed-ended academic questions. To inform research and policymaking upon a clear understanding of model capabilities, we publicly release HLE at https://lastexam.ai. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v4-abstract-full').style.display = 'none'; document.getElementById('2501.14249v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14050">arXiv:2501.14050</a> <span> [<a href="https://arxiv.org/pdf/2501.14050">pdf</a>, <a href="https://arxiv.org/format/2501.14050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> GraphRAG under Fire </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jiacheng Liang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhui Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Changjiang Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Rongyi Zhu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+T">Tanqiu Jiang</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+N">Neil Gong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14050v1-abstract-short" style="display: inline;"> GraphRAG advances retrieval-augmented generation (RAG) by structuring external knowledge as multi-scale knowledge graphs, enabling language models to integrate both broad context and granular details in their reasoning. While GraphRAG has demonstrated success across domains, its security implications remain largely unexplored. To bridge this gap, this work examines GraphRAG's vulnerability to pois… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14050v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14050v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14050v1-abstract-full" style="display: none;"> GraphRAG advances retrieval-augmented generation (RAG) by structuring external knowledge as multi-scale knowledge graphs, enabling language models to integrate both broad context and granular details in their reasoning. While GraphRAG has demonstrated success across domains, its security implications remain largely unexplored. To bridge this gap, this work examines GraphRAG's vulnerability to poisoning attacks, uncovering an intriguing security paradox: compared to conventional RAG, GraphRAG's graph-based indexing and retrieval enhance resilience against simple poisoning attacks; meanwhile, the same features also create new attack surfaces. We present GRAGPoison, a novel attack that exploits shared relations in the knowledge graph to craft poisoning text capable of compromising multiple queries simultaneously. GRAGPoison employs three key strategies: i) relation injection to introduce false knowledge, ii) relation enhancement to amplify poisoning influence, and iii) narrative generation to embed malicious content within coherent text. Empirical evaluation across diverse datasets and models shows that GRAGPoison substantially outperforms existing attacks in terms of effectiveness (up to 98% success rate) and scalability (using less than 68% poisoning text). We also explore potential defensive measures and their limitations, identifying promising directions for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14050v1-abstract-full').style.display = 'none'; document.getElementById('2501.14050v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12948">arXiv:2501.12948</a> <span> [<a href="https://arxiv.org/pdf/2501.12948">pdf</a>, <a href="https://arxiv.org/format/2501.12948">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeepSeek-AI"> DeepSeek-AI</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+D">Daya Guo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Dejian Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haowei Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">Junxiao Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruoyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Runxin Xu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qihao Zhu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shirong Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+X">Xiao Bi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaokang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xingkai Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+F">Z. F. Wu</a>, <a href="/search/cs?searchtype=author&query=Gou%2C+Z">Zhibin Gou</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Z">Zhihong Shao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoshu Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Ziyi Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Aixin Liu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+B">Bing Xue</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bingxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bochao Wu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+B">Bei Feng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chengda Lu</a> , et al. (175 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12948v1-abstract-short" style="display: inline;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12948v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12948v1-abstract-full" style="display: none;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters challenges such as poor readability, and language mixing. To address these issues and further enhance reasoning performance, we introduce DeepSeek-R1, which incorporates multi-stage training and cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217 on reasoning tasks. To support the research community, we open-source DeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B, 70B) distilled from DeepSeek-R1 based on Qwen and Llama. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'none'; document.getElementById('2501.12948v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12617">arXiv:2501.12617</a> <span> [<a href="https://arxiv.org/pdf/2501.12617">pdf</a>, <a href="https://arxiv.org/format/2501.12617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s10664-024-10592-z">10.1007/s10664-024-10592-z <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep Learning-Based Identification of Inconsistent Method Names: How Far Are We? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Taiming Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuxia Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Lin Jiang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yi Tang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guangjie Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12617v1-abstract-short" style="display: inline;"> Concise and meaningful method names are crucial for program comprehension and maintenance. However, method names may become inconsistent with their corresponding implementations, causing confusion and errors. Several deep learning (DL)-based approaches have been proposed to identify such inconsistencies, with initial evaluations showing promising results. However, these evaluations typically use a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12617v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12617v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12617v1-abstract-full" style="display: none;"> Concise and meaningful method names are crucial for program comprehension and maintenance. However, method names may become inconsistent with their corresponding implementations, causing confusion and errors. Several deep learning (DL)-based approaches have been proposed to identify such inconsistencies, with initial evaluations showing promising results. However, these evaluations typically use a balanced dataset, where the number of inconsistent and consistent names are equal. This setup, along with flawed dataset construction, leads to false positives, making reported performance less reliable in real-world scenarios, where most method names are consistent. In this paper, we present an empirical study that evaluates state-of-the-art DL-based methods for identifying inconsistent method names. We create a new benchmark by combining automatic identification from commit histories and manual developer inspections, reducing false positives. We evaluate five representative DL approaches (one retrieval-based and four generation-based) on this benchmark. Our results show that performance drops substantially when moving from the balanced dataset to the new benchmark. We further conduct quantitative and qualitative analyses to understand the strengths and weaknesses of the approaches. Retrieval-based methods perform well on simple methods and those with popular name sub-tokens but fail due to inefficient representation techniques. Generation-based methods struggle with inaccurate similarity calculations and immature name generation. Based on these findings, we propose improvements using contrastive learning and large language models (LLMs). Our study suggests that significant improvements are needed before these DL approaches can be effectively applied to real-world software systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12617v1-abstract-full').style.display = 'none'; document.getElementById('2501.12617v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Empirical Software Engineering, 2025, 30(1): 31 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10408">arXiv:2501.10408</a> <span> [<a href="https://arxiv.org/pdf/2501.10408">pdf</a>, <a href="https://arxiv.org/format/2501.10408">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Cross-Attention Transformer and Multi-Feature Fusion for Cross-Linguistic Speech Emotion Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+R">Ruoyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xiantao Jiang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+F+R">F. Richard Yu</a>, <a href="/search/cs?searchtype=author&query=Leung%2C+V+C+M">Victor C. M. Leung</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shaohu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10408v1-abstract-short" style="display: inline;"> Speech Emotion Recognition (SER) plays a crucial role in enhancing human-computer interaction. Cross-Linguistic SER (CLSER) has been a challenging research problem due to significant variability in linguistic and acoustic features of different languages. In this study, we propose a novel approach HuMP-CAT, which combines HuBERT, MFCC, and prosodic characteristics. These features are fused using a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10408v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10408v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10408v1-abstract-full" style="display: none;"> Speech Emotion Recognition (SER) plays a crucial role in enhancing human-computer interaction. Cross-Linguistic SER (CLSER) has been a challenging research problem due to significant variability in linguistic and acoustic features of different languages. In this study, we propose a novel approach HuMP-CAT, which combines HuBERT, MFCC, and prosodic characteristics. These features are fused using a cross-attention transformer (CAT) mechanism during feature extraction. Transfer learning is applied to gain from a source emotional speech dataset to the target corpus for emotion recognition. We use IEMOCAP as the source dataset to train the source model and evaluate the proposed method on seven datasets in five languages (e.g., English, German, Spanish, Italian, and Chinese). We show that, by fine-tuning the source model with a small portion of speech from the target datasets, HuMP-CAT achieves an average accuracy of 78.75% across the seven datasets, with notable performance of 88.69% on EMODB (German language) and 79.48% on EMOVO (Italian language). Our extensive evaluation demonstrates that HuMP-CAT outperforms existing methods across multiple target languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10408v1-abstract-full').style.display = 'none'; document.getElementById('2501.10408v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09767">arXiv:2501.09767</a> <span> [<a href="https://arxiv.org/pdf/2501.09767">pdf</a>, <a href="https://arxiv.org/format/2501.09767">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LeMo: Enabling LEss Token Involvement for MOre Context Fine-tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tuowei Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xingyu Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kun Li</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+T">Ting Cao</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Ju Ren</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yaoxue Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09767v1-abstract-short" style="display: inline;"> The escalating demand for long-context applications has intensified the necessity of extending the LLM context windows. Despite recent fine-tuning approaches successfully expanding context lengths, their high memory footprints, especially for activations, present a critical practical limitation. Current parameter-efficient fine-tuning methods prioritize reducing parameter update overhead over addr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09767v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09767v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09767v1-abstract-full" style="display: none;"> The escalating demand for long-context applications has intensified the necessity of extending the LLM context windows. Despite recent fine-tuning approaches successfully expanding context lengths, their high memory footprints, especially for activations, present a critical practical limitation. Current parameter-efficient fine-tuning methods prioritize reducing parameter update overhead over addressing activation memory constraints. Similarly, existing sparsity mechanisms improve computational efficiency but overlook activation memory optimization due to the phenomenon of Shadowy Activation. In this paper, we propose LeMo, the first LLM fine-tuning system that explores and exploits a new token-level sparsity mechanism inherent in long-context scenarios, termed Contextual Token Sparsity. LeMo minimizes redundant token involvement by assessing the informativeness of token embeddings while preserving model accuracy. Specifically, LeMo introduces three key techniques: (1) Token Elimination, dynamically identifying and excluding redundant tokens across varying inputs and layers. (2) Pattern Prediction, utilizing well-trained predictors to approximate token sparsity patterns with minimal overhead. (3) Kernel Optimization, employing permutation-free and segment-based strategies to boost system performance. We implement LeMo as an end-to-end fine-tuning system compatible with various LLM architectures and other optimization techniques. Comprehensive evaluations demonstrate that LeMo reduces memory consumption by up to 1.93x and achieves up to 1.36x speedups, outperforming state-of-the-art fine-tuning systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09767v1-abstract-full').style.display = 'none'; document.getElementById('2501.09767v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09709">arXiv:2501.09709</a> <span> [<a href="https://arxiv.org/pdf/2501.09709">pdf</a>, <a href="https://arxiv.org/format/2501.09709">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CyberMentor: AI Powered Learning Tool Platform to Address Diverse Student Needs in Cybersecurity Education </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyu Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+N">Nianjun Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhixiong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09709v1-abstract-short" style="display: inline;"> Many non-traditional students in cybersecurity programs often lack access to advice from peers, family members and professors, which can hinder their educational experiences. Additionally, these students may not fully benefit from various LLM-powered AI assistants due to issues like content relevance, locality of advice, minimum expertise, and timing. This paper addresses these challenges by intro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09709v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09709v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09709v1-abstract-full" style="display: none;"> Many non-traditional students in cybersecurity programs often lack access to advice from peers, family members and professors, which can hinder their educational experiences. Additionally, these students may not fully benefit from various LLM-powered AI assistants due to issues like content relevance, locality of advice, minimum expertise, and timing. This paper addresses these challenges by introducing an application designed to provide comprehensive support by answering questions related to knowledge, skills, and career preparation advice tailored to the needs of these students. We developed a learning tool platform, CyberMentor, to address the diverse needs and pain points of students majoring in cybersecurity. Powered by agentic workflow and Generative Large Language Models (LLMs), the platform leverages Retrieval-Augmented Generation (RAG) for accurate and contextually relevant information retrieval to achieve accessibility and personalization. We demonstrated its value in addressing knowledge requirements for cybersecurity education and for career marketability, in tackling skill requirements for analytical and programming assignments, and in delivering real time on demand learning support. Using three use scenarios, we showcased CyberMentor in facilitating knowledge acquisition and career preparation and providing seamless skill-based guidance and support. We also employed the LangChain prompt-based evaluation methodology to evaluate the platform's impact, confirming its strong performance in helpfulness, correctness, and completeness. These results underscore the system's ability to support students in developing practical cybersecurity skills while improving equity and sustainability within higher education. Furthermore, CyberMentor's open-source design allows for adaptation across other disciplines, fostering educational innovation and broadening its potential impact. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09709v1-abstract-full').style.display = 'none'; document.getElementById('2501.09709v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> K.3.2; I.2.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07917">arXiv:2501.07917</a> <span> [<a href="https://arxiv.org/pdf/2501.07917">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Roadmap on Neuromorphic Photonics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Brunner%2C+D">Daniel Brunner</a>, <a href="/search/cs?searchtype=author&query=Shastri%2C+B+J">Bhavin J. Shastri</a>, <a href="/search/cs?searchtype=author&query=Qadasi%2C+M+A+A">Mohammed A. Al Qadasi</a>, <a href="/search/cs?searchtype=author&query=Ballani%2C+H">H. Ballani</a>, <a href="/search/cs?searchtype=author&query=Barbay%2C+S">Sylvain Barbay</a>, <a href="/search/cs?searchtype=author&query=Biasi%2C+S">Stefano Biasi</a>, <a href="/search/cs?searchtype=author&query=Bienstman%2C+P">Peter Bienstman</a>, <a href="/search/cs?searchtype=author&query=Bilodeau%2C+S">Simon Bilodeau</a>, <a href="/search/cs?searchtype=author&query=Bogaerts%2C+W">Wim Bogaerts</a>, <a href="/search/cs?searchtype=author&query=B%C3%B6hm%2C+F">Fabian B枚hm</a>, <a href="/search/cs?searchtype=author&query=Brennan%2C+G">G. Brennan</a>, <a href="/search/cs?searchtype=author&query=Buckley%2C+S">Sonia Buckley</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xinlun Cai</a>, <a href="/search/cs?searchtype=author&query=Strinati%2C+M+C">Marcello Calvanese Strinati</a>, <a href="/search/cs?searchtype=author&query=Canakci%2C+B">B. Canakci</a>, <a href="/search/cs?searchtype=author&query=Charbonnier%2C+B">Benoit Charbonnier</a>, <a href="/search/cs?searchtype=author&query=Chemnitz%2C+M">Mario Chemnitz</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yitong Chen</a>, <a href="/search/cs?searchtype=author&query=Cheung%2C+S">Stanley Cheung</a>, <a href="/search/cs?searchtype=author&query=Chiles%2C+J">Jeff Chiles</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+S">Suyeon Choi</a>, <a href="/search/cs?searchtype=author&query=Christodoulides%2C+D+N">Demetrios N. Christodoulides</a>, <a href="/search/cs?searchtype=author&query=Chrostowski%2C+L">Lukas Chrostowski</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+J">J. Chu</a>, <a href="/search/cs?searchtype=author&query=Clegg%2C+J+H">J. H. Clegg</a> , et al. (125 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07917v2-abstract-short" style="display: inline;"> This roadmap consolidates recent advances while exploring emerging applications, reflecting the remarkable diversity of hardware platforms, neuromorphic concepts, and implementation philosophies reported in the field. It emphasizes the critical role of cross-disciplinary collaboration in this rapidly evolving field. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07917v2-abstract-full" style="display: none;"> This roadmap consolidates recent advances while exploring emerging applications, reflecting the remarkable diversity of hardware platforms, neuromorphic concepts, and implementation philosophies reported in the field. It emphasizes the critical role of cross-disciplinary collaboration in this rapidly evolving field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07917v2-abstract-full').style.display = 'none'; document.getElementById('2501.07917v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wang%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>