Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,491 results for author: <span class="mathjax">Ma, J</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Ma%2C+J">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ma, J"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ma%2C+J&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ma, J"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ma%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09662">arXiv:2502.09662</a> <span> [<a href="https://arxiv.org/pdf/2502.09662">pdf</a>, <a href="https://arxiv.org/format/2502.09662">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Generalizable Cervical Cancer Screening via Large-scale Pretraining and Test-Time Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Hao Jiang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+C">Cheng Jin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Huangjing Lin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yanning Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xi Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiabo Ma</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+L">Li Ding</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+J">Jun Hou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+R">Runsheng Liu</a>, <a href="/search/cs?searchtype=author&query=Chai%2C+Z">Zhizhong Chai</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Luyang Luo</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+H">Huijuan Shi</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+Y">Yinling Qian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiong Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Changzhong Li</a>, <a href="/search/cs?searchtype=author&query=Han%2C+A">Anjia Han</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+R+C+K">Ronald Cheong Kin Chan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09662v1-abstract-short" style="display: inline;"> Cervical cancer is a leading malignancy in female reproductive system. While AI-assisted cytology offers a cost-effective and non-invasive screening solution, current systems struggle with generalizability in complex clinical scenarios. To address this issue, we introduced Smart-CCS, a generalizable Cervical Cancer Screening paradigm based on pretraining and adaptation to create robust and general… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09662v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09662v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09662v1-abstract-full" style="display: none;"> Cervical cancer is a leading malignancy in female reproductive system. While AI-assisted cytology offers a cost-effective and non-invasive screening solution, current systems struggle with generalizability in complex clinical scenarios. To address this issue, we introduced Smart-CCS, a generalizable Cervical Cancer Screening paradigm based on pretraining and adaptation to create robust and generalizable screening systems. To develop and validate Smart-CCS, we first curated a large-scale, multi-center dataset named CCS-127K, which comprises a total of 127,471 cervical cytology whole-slide images collected from 48 medical centers. By leveraging large-scale self-supervised pretraining, our CCS models are equipped with strong generalization capability, potentially generalizing across diverse scenarios. Then, we incorporated test-time adaptation to specifically optimize the trained CCS model for complex clinical settings, which adapts and refines predictions, improving real-world applicability. We conducted large-scale system evaluation among various cohorts. In retrospective cohorts, Smart-CCS achieved an overall area under the curve (AUC) value of 0.965 and sensitivity of 0.913 for cancer screening on 11 internal test datasets. In external testing, system performance maintained high at 0.950 AUC across 6 independent test datasets. In prospective cohorts, our Smart-CCS achieved AUCs of 0.947, 0.924, and 0.986 in three prospective centers, respectively. Moreover, the system demonstrated superior sensitivity in diagnosing cervical cancer, confirming the accuracy of our cancer screening results by using histology findings for validation. Interpretability analysis with cell and slide predictions further indicated that the system's decision-making aligns with clinical practice. Smart-CCS represents a significant advancement in cancer screening across diverse clinical contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09662v1-abstract-full').style.display = 'none'; document.getElementById('2502.09662v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08888">arXiv:2502.08888</a> <span> [<a href="https://arxiv.org/pdf/2502.08888">pdf</a>, <a href="https://arxiv.org/format/2502.08888">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLM-Enhanced Multiple Instance Learning for Joint Rumor and Stance Detection with Social Context Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+R">Ruichao Yang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jing Ma</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+W">Wei Gao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hongzhan Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08888v1-abstract-short" style="display: inline;"> The proliferation of misinformation, such as rumors on social media, has drawn significant attention, prompting various expressions of stance among users. Although rumor detection and stance detection are distinct tasks, they can complement each other. Rumors can be identified by cross-referencing stances in related posts, and stances are influenced by the nature of the rumor. However, existing st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08888v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08888v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08888v1-abstract-full" style="display: none;"> The proliferation of misinformation, such as rumors on social media, has drawn significant attention, prompting various expressions of stance among users. Although rumor detection and stance detection are distinct tasks, they can complement each other. Rumors can be identified by cross-referencing stances in related posts, and stances are influenced by the nature of the rumor. However, existing stance detection methods often require post-level stance annotations, which are costly to obtain. We propose a novel LLM-enhanced MIL approach to jointly predict post stance and claim class labels, supervised solely by claim labels, using an undirected microblog propagation model. Our weakly supervised approach relies only on bag-level labels of claim veracity, aligning with multi-instance learning (MIL) principles. To achieve this, we transform the multi-class problem into multiple MIL-based binary classification problems. We then employ a discriminative attention layer to aggregate the outputs from these classifiers into finer-grained classes. Experiments conducted on three rumor datasets and two stance datasets demonstrate the effectiveness of our approach, highlighting strong connections between rumor veracity and expressed stances in responding posts. Our method shows promising performance in joint rumor and stance detection compared to the state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08888v1-abstract-full').style.display = 'none'; document.getElementById('2502.08888v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACM TIST</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08674">arXiv:2502.08674</a> <span> [<a href="https://arxiv.org/pdf/2502.08674">pdf</a>, <a href="https://arxiv.org/format/2502.08674">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TMM.2022.3185894">10.1109/TMM.2022.3185894 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> COutfitGAN: Learning to Synthesize Compatible Outfits Supervised by Silhouette Masks and Fashion Styles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+D">Dongliang Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haijun Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qun Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianghong Ma</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaofei Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08674v1-abstract-short" style="display: inline;"> How to recommend outfits has gained considerable attention in both academia and industry in recent years. Many studies have been carried out regarding fashion compatibility learning, to determine whether the fashion items in an outfit are compatible or not. These methods mainly focus on evaluating the compatibility of existing outfits and rarely consider applying such knowledge to 'design' new fas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08674v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08674v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08674v1-abstract-full" style="display: none;"> How to recommend outfits has gained considerable attention in both academia and industry in recent years. Many studies have been carried out regarding fashion compatibility learning, to determine whether the fashion items in an outfit are compatible or not. These methods mainly focus on evaluating the compatibility of existing outfits and rarely consider applying such knowledge to 'design' new fashion items. We propose the new task of generating complementary and compatible fashion items based on an arbitrary number of given fashion items. In particular, given some fashion items that can make up an outfit, the aim of this paper is to synthesize photo-realistic images of other, complementary, fashion items that are compatible with the given ones. To achieve this, we propose an outfit generation framework, referred to as COutfitGAN, which includes a pyramid style extractor, an outfit generator, a UNet-based real/fake discriminator, and a collocation discriminator. To train and evaluate this framework, we collected a large-scale fashion outfit dataset with over 200K outfits and 800K fashion items from the Internet. Extensive experiments show that COutfitGAN outperforms other baselines in terms of similarity, authenticity, and compatibility measurements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08674v1-abstract-full').style.display = 'none'; document.getElementById('2502.08674v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper was accepted by IEEE TMM</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06874">arXiv:2502.06874</a> <span> [<a href="https://arxiv.org/pdf/2502.06874">pdf</a>, <a href="https://arxiv.org/format/2502.06874">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Group Reasoning Emission Estimation Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yanming Guo</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+X">Xiao Qian</a>, <a href="/search/cs?searchtype=author&query=Credit%2C+K">Kevin Credit</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jin Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06874v1-abstract-short" style="display: inline;"> Accurate greenhouse gas (GHG) emission reporting is critical for governments, businesses, and investors. However, adoption remains limited particularly among small and medium enterprises due to high implementation costs, fragmented emission factor databases, and a lack of robust sector classification methods. To address these challenges, we introduce Group Reasoning Emission Estimation Networks (G… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06874v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06874v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06874v1-abstract-full" style="display: none;"> Accurate greenhouse gas (GHG) emission reporting is critical for governments, businesses, and investors. However, adoption remains limited particularly among small and medium enterprises due to high implementation costs, fragmented emission factor databases, and a lack of robust sector classification methods. To address these challenges, we introduce Group Reasoning Emission Estimation Networks (GREEN), an AI-driven carbon accounting framework that standardizes enterprise-level emission estimation, constructs a large-scale benchmark dataset, and leverages a novel reasoning approach with large language models (LLMs). Specifically, we compile textual descriptions for 20,850 companies with validated North American Industry Classification System (NAICS) labels and align these with an economic model of carbon intensity factors. By reframing sector classification as an information retrieval task, we fine-tune Sentence-BERT models using a contrastive learning loss. To overcome the limitations of single-stage models in handling thousands of hierarchical categories, we propose a Group Reasoning method that ensembles LLM classifiers based on the natural NAICS ontology, decomposing the task into multiple sub-classification steps. We theoretically prove that this approach reduces classification uncertainty and computational complexity. Experiments on 1,114 NAICS categories yield state-of-the-art performance (83.68% Top-1, 91.47% Top-10 accuracy), and case studies on 20 companies report a mean absolute percentage error (MAPE) of 45.88%. The project is available at: https://huggingface.co/datasets/Yvnminc/ExioNAICS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06874v1-abstract-full').style.display = 'none'; document.getElementById('2502.06874v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06359">arXiv:2502.06359</a> <span> [<a href="https://arxiv.org/pdf/2502.06359">pdf</a>, <a href="https://arxiv.org/format/2502.06359">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Occlusion-Aware Contingency Safety-Critical Planning for Autonomous Vehicles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+L">Lei Zheng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+R">Rui Yang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+M">Minzhe Zheng</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Zengqi Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M+Y">Michael Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06359v1-abstract-short" style="display: inline;"> Ensuring safe driving while maintaining travel efficiency for autonomous vehicles in dynamic and occluded environments is a critical challenge. This paper proposes an occlusion-aware contingency safety-critical planning approach for real-time autonomous driving in such environments. Leveraging reachability analysis for risk assessment, forward reachable sets of occluded phantom vehicles are comput… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06359v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06359v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06359v1-abstract-full" style="display: none;"> Ensuring safe driving while maintaining travel efficiency for autonomous vehicles in dynamic and occluded environments is a critical challenge. This paper proposes an occlusion-aware contingency safety-critical planning approach for real-time autonomous driving in such environments. Leveraging reachability analysis for risk assessment, forward reachable sets of occluded phantom vehicles are computed to quantify dynamic velocity boundaries. These velocity boundaries are incorporated into a biconvex nonlinear programming (NLP) formulation, enabling simultaneous optimization of exploration and fallback trajectories within a receding horizon planning framework. To facilitate real-time optimization and ensure coordination between trajectories, we employ the consensus alternating direction method of multipliers (ADMM) to decompose the biconvex NLP problem into low-dimensional convex subproblems. The effectiveness of the proposed approach is validated through simulation studies and real-world experiments in occluded intersections. Experimental results demonstrate enhanced safety and improved travel efficiency, enabling real-time safe trajectory generation in dynamic occluded intersections under varying obstacle conditions. A video showcasing the experimental results is available at https://youtu.be/CHayG7NChqM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06359v1-abstract-full').style.display = 'none'; document.getElementById('2502.06359v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05330">arXiv:2502.05330</a> <span> [<a href="https://arxiv.org/pdf/2502.05330">pdf</a>, <a href="https://arxiv.org/format/2502.05330">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-Class Segmentation of Aortic Branches and Zones in Computed Tomography Angiography: The AortaSeg24 Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Imran%2C+M">Muhammad Imran</a>, <a href="/search/cs?searchtype=author&query=Krebs%2C+J+R">Jonathan R. Krebs</a>, <a href="/search/cs?searchtype=author&query=Sivaraman%2C+V+B">Vishal Balaji Sivaraman</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Teng Zhang</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Amarjeet Kumar</a>, <a href="/search/cs?searchtype=author&query=Ueland%2C+W+R">Walker R. Ueland</a>, <a href="/search/cs?searchtype=author&query=Fassler%2C+M+J">Michael J. Fassler</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jinlong Huang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xiao Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lisheng Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+P">Pengcheng Shi</a>, <a href="/search/cs?searchtype=author&query=Rokuss%2C+M">Maximilian Rokuss</a>, <a href="/search/cs?searchtype=author&query=Baumgartner%2C+M">Michael Baumgartner</a>, <a href="/search/cs?searchtype=author&query=Kirchhof%2C+Y">Yannick Kirchhof</a>, <a href="/search/cs?searchtype=author&query=Maier-Hein%2C+K+H">Klaus H. Maier-Hein</a>, <a href="/search/cs?searchtype=author&query=Isensee%2C+F">Fabian Isensee</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuolin Liu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+B">Bing Han</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+B+T">Bong Thanh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+D">Dong-jin Shin</a>, <a href="/search/cs?searchtype=author&query=Ji-Woo%2C+P">Park Ji-Woo</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+M">Mathew Choi</a>, <a href="/search/cs?searchtype=author&query=Uhm%2C+K">Kwang-Hyun Uhm</a>, <a href="/search/cs?searchtype=author&query=Ko%2C+S">Sung-Jea Ko</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chanwoong Lee</a> , et al. (38 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05330v1-abstract-short" style="display: inline;"> Multi-class segmentation of the aorta in computed tomography angiography (CTA) scans is essential for diagnosing and planning complex endovascular treatments for patients with aortic dissections. However, existing methods reduce aortic segmentation to a binary problem, limiting their ability to measure diameters across different branches and zones. Furthermore, no open-source dataset is currently… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05330v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05330v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05330v1-abstract-full" style="display: none;"> Multi-class segmentation of the aorta in computed tomography angiography (CTA) scans is essential for diagnosing and planning complex endovascular treatments for patients with aortic dissections. However, existing methods reduce aortic segmentation to a binary problem, limiting their ability to measure diameters across different branches and zones. Furthermore, no open-source dataset is currently available to support the development of multi-class aortic segmentation methods. To address this gap, we organized the AortaSeg24 MICCAI Challenge, introducing the first dataset of 100 CTA volumes annotated for 23 clinically relevant aortic branches and zones. This dataset was designed to facilitate both model development and validation. The challenge attracted 121 teams worldwide, with participants leveraging state-of-the-art frameworks such as nnU-Net and exploring novel techniques, including cascaded models, data augmentation strategies, and custom loss functions. We evaluated the submitted algorithms using the Dice Similarity Coefficient (DSC) and Normalized Surface Distance (NSD), highlighting the approaches adopted by the top five performing teams. This paper presents the challenge design, dataset details, evaluation metrics, and an in-depth analysis of the top-performing algorithms. The annotated dataset, evaluation code, and implementations of the leading methods are publicly available to support further research. All resources can be accessed at https://aortaseg24.grand-challenge.org. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05330v1-abstract-full').style.display = 'none'; document.getElementById('2502.05330v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05130">arXiv:2502.05130</a> <span> [<a href="https://arxiv.org/pdf/2502.05130">pdf</a>, <a href="https://arxiv.org/format/2502.05130">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Latent Swap Joint Diffusion for Long-Form Audio Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dai%2C+Y">Yusheng Dai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chenxi Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chang Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chen Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+J">Jun Du</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kewei Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruoyu Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiefeng Ma</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Lei Sun</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jianqing Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05130v1-abstract-short" style="display: inline;"> Previous work on long-form audio generation using global-view diffusion or iterative generation demands significant training or inference costs. While recent advancements in multi-view joint diffusion for panoramic generation provide an efficient option, they struggle with spectrum generation with severe overlap distortions and high cross-view consistency costs. We initially explore this phenomeno… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05130v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05130v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05130v1-abstract-full" style="display: none;"> Previous work on long-form audio generation using global-view diffusion or iterative generation demands significant training or inference costs. While recent advancements in multi-view joint diffusion for panoramic generation provide an efficient option, they struggle with spectrum generation with severe overlap distortions and high cross-view consistency costs. We initially explore this phenomenon through the connectivity inheritance of latent maps and uncover that averaging operations excessively smooth the high-frequency components of the latent map. To address these issues, we propose Swap Forward (SaFa), a frame-level latent swap framework that synchronizes multiple diffusions to produce a globally coherent long audio with more spectrum details in a forward-only manner. At its core, the bidirectional Self-Loop Latent Swap is applied between adjacent views, leveraging stepwise diffusion trajectory to adaptively enhance high-frequency components without disrupting low-frequency components. Furthermore, to ensure cross-view consistency, the unidirectional Reference-Guided Latent Swap is applied between the reference and the non-overlap regions of each subview during the early stages, providing centralized trajectory guidance. Quantitative and qualitative experiments demonstrate that SaFa significantly outperforms existing joint diffusion methods and even training-based long audio generation models. Moreover, we find that it also adapts well to panoramic generation, achieving comparable state-of-the-art performance with greater efficiency and model generalizability. Project page is available at https://swapforward.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05130v1-abstract-full').style.display = 'none'; document.getElementById('2502.05130v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04140">arXiv:2502.04140</a> <span> [<a href="https://arxiv.org/pdf/2502.04140">pdf</a>, <a href="https://arxiv.org/format/2502.04140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Synthetic Datasets for Machine Learning on Spatio-Temporal Graphs using PDEs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arndt%2C+J">Jost Arndt</a>, <a href="/search/cs?searchtype=author&query=Isil%2C+U">Utku Isil</a>, <a href="/search/cs?searchtype=author&query=Detzel%2C+M">Michael Detzel</a>, <a href="/search/cs?searchtype=author&query=Samek%2C+W">Wojciech Samek</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jackie Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04140v1-abstract-short" style="display: inline;"> Many physical processes can be expressed through partial differential equations (PDEs). Real-world measurements of such processes are often collected at irregularly distributed points in space, which can be effectively represented as graphs; however, there are currently only a few existing datasets. Our work aims to make advancements in the field of PDE-modeling accessible to the temporal graph ma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04140v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04140v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04140v1-abstract-full" style="display: none;"> Many physical processes can be expressed through partial differential equations (PDEs). Real-world measurements of such processes are often collected at irregularly distributed points in space, which can be effectively represented as graphs; however, there are currently only a few existing datasets. Our work aims to make advancements in the field of PDE-modeling accessible to the temporal graph machine learning community, while addressing the data scarcity problem, by creating and utilizing datasets based on PDEs. In this work, we create and use synthetic datasets based on PDEs to support spatio-temporal graph modeling in machine learning for different applications. More precisely, we showcase three equations to model different types of disasters and hazards in the fields of epidemiology, atmospheric particles, and tsunami waves. Further, we show how such created datasets can be used by benchmarking several machine learning models on the epidemiological dataset. Additionally, we show how pre-training on this dataset can improve model performance on real-world epidemiological data. The presented methods enable others to create datasets and benchmarks customized to individual requirements. The source code for our methodology and the three created datasets can be found on https://github.com/github-usr-ano/Temporal_Graph_Data_PDEs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04140v1-abstract-full').style.display = 'none'; document.getElementById('2502.04140v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Currently under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03960">arXiv:2502.03960</a> <span> [<a href="https://arxiv.org/pdf/2502.03960">pdf</a>, <a href="https://arxiv.org/format/2502.03960">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Bilevel Multi-Armed Bandit-Based Hierarchical Reinforcement Learning for Interaction-Aware Self-Driving at Unsignalized Intersections </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Zengqi Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yubin Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+L">Lei Zheng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03960v1-abstract-short" style="display: inline;"> In this work, we present BiM-ACPPO, a bilevel multi-armed bandit-based hierarchical reinforcement learning framework for interaction-aware decision-making and planning at unsignalized intersections. Essentially, it proactively takes the uncertainties associated with surrounding vehicles (SVs) into consideration, which encompass those stemming from the driver's intention, interactive behaviors, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03960v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03960v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03960v1-abstract-full" style="display: none;"> In this work, we present BiM-ACPPO, a bilevel multi-armed bandit-based hierarchical reinforcement learning framework for interaction-aware decision-making and planning at unsignalized intersections. Essentially, it proactively takes the uncertainties associated with surrounding vehicles (SVs) into consideration, which encompass those stemming from the driver's intention, interactive behaviors, and the varying number of SVs. Intermediate decision variables are introduced to enable the high-level RL policy to provide an interaction-aware reference, for guiding low-level model predictive control (MPC) and further enhancing the generalization ability of the proposed framework. By leveraging the structured nature of self-driving at unsignalized intersections, the training problem of the RL policy is modeled as a bilevel curriculum learning task, which is addressed by the proposed Exp3.S-based BiMAB algorithm. It is noteworthy that the training curricula are dynamically adjusted, thereby facilitating the sample efficiency of the RL training process. Comparative experiments are conducted in the high-fidelity CARLA simulator, and the results indicate that our approach achieves superior performance compared to all baseline methods. Furthermore, experimental results in two new urban driving scenarios clearly demonstrate the commendable generalization performance of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03960v1-abstract-full').style.display = 'none'; document.getElementById('2502.03960v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by IEEE Transactions on Vehicular Technology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03748">arXiv:2502.03748</a> <span> [<a href="https://arxiv.org/pdf/2502.03748">pdf</a>, <a href="https://arxiv.org/format/2502.03748">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Rethinking the Residual Distribution of Locate-then-Editing Methods in Model Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaopeng Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shanwen Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shasha Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+S">Shezheng Song</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+B">Bin Ji</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jie Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03748v1-abstract-short" style="display: inline;"> Model editing is a powerful technique for updating the knowledge of Large Language Models (LLMs). Locate-then-edit methods are a popular class of approaches that first identify the critical layers storing knowledge, then compute the residual of the last critical layer based on the edited knowledge, and finally perform multi-layer updates using a least-squares solution by evenly distributing the re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03748v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03748v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03748v1-abstract-full" style="display: none;"> Model editing is a powerful technique for updating the knowledge of Large Language Models (LLMs). Locate-then-edit methods are a popular class of approaches that first identify the critical layers storing knowledge, then compute the residual of the last critical layer based on the edited knowledge, and finally perform multi-layer updates using a least-squares solution by evenly distributing the residual from the first critical layer to the last. Although these methods achieve promising results, they have been shown to degrade the original knowledge of LLMs. We argue that residual distribution leads to this issue. To explore this, we conduct a comprehensive analysis of residual distribution in locate-then-edit methods from both empirical and theoretical perspectives, revealing that residual distribution introduces editing errors, leading to inaccurate edits. To address this issue, we propose the Boundary Layer UpdatE (BLUE) strategy to enhance locate-then-edit methods. Sequential batch editing experiments on three LLMs and two datasets demonstrate that BLUE not only delivers an average performance improvement of 35.59\%, significantly advancing the state of the art in model editing, but also enhances the preservation of LLMs' general capabilities. Our code is available at https://github.com/xpq-tech/BLUE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03748v1-abstract-full').style.display = 'none'; document.getElementById('2502.03748v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02934">arXiv:2502.02934</a> <span> [<a href="https://arxiv.org/pdf/2502.02934">pdf</a>, <a href="https://arxiv.org/format/2502.02934">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Gait-Net-augmented Implicit Kino-dynamic MPC for Dynamic Variable-frequency Humanoid Locomotion over Discrete Terrains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Junheng Li</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+Z">Ziwei Duan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Junchao Ma</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+Q">Quan Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02934v1-abstract-short" style="display: inline;"> Current optimization-based control techniques for humanoid locomotion struggle to adapt step duration and placement simultaneously in dynamic walking gaits due to their reliance on fixed-time discretization, which limits responsiveness to terrain conditions and results in suboptimal performance in challenging environments. In this work, we propose a Gait-Net-augmented implicit kino-dynamic model-p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02934v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02934v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02934v1-abstract-full" style="display: none;"> Current optimization-based control techniques for humanoid locomotion struggle to adapt step duration and placement simultaneously in dynamic walking gaits due to their reliance on fixed-time discretization, which limits responsiveness to terrain conditions and results in suboptimal performance in challenging environments. In this work, we propose a Gait-Net-augmented implicit kino-dynamic model-predictive control (MPC) to simultaneously optimize step location, step duration, and contact forces for natural variable-frequency locomotion. The proposed method incorporates a Gait-Net-augmented Sequential Convex MPC algorithm to solve multi-linearly constrained variables by iterative quadratic programs. At its core, a lightweight Gait-frequency Network (Gait-Net) determines the preferred step duration in terms of variable MPC sampling times, simplifying step duration optimization to the parameter level. Additionally, it enhances and updates the spatial reference trajectory within each sequential iteration by incorporating local solutions, allowing the projection of kinematic constraints to the design of reference trajectories. We validate the proposed algorithm in high-fidelity simulations and on small-size humanoid hardware, demonstrating its capability for variable-frequency and 3-D discrete terrain locomotion with only a one-step preview of terrain data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02934v1-abstract-full').style.display = 'none'; document.getElementById('2502.02934v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02673">arXiv:2502.02673</a> <span> [<a href="https://arxiv.org/pdf/2502.02673">pdf</a>, <a href="https://arxiv.org/format/2502.02673">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> MedRAX: Medical Reasoning Agent for Chest X-ray </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fallahpour%2C+A">Adibvafa Fallahpour</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a>, <a href="/search/cs?searchtype=author&query=Munim%2C+A">Alif Munim</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+H">Hongwei Lyu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bo Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02673v1-abstract-short" style="display: inline;"> Chest X-rays (CXRs) play an integral role in driving critical decisions in disease management and patient care. While recent innovations have led to specialized models for various CXR interpretation tasks, these solutions often operate in isolation, limiting their practical utility in clinical practice. We present MedRAX, the first versatile AI agent that seamlessly integrates state-of-the-art CXR… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02673v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02673v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02673v1-abstract-full" style="display: none;"> Chest X-rays (CXRs) play an integral role in driving critical decisions in disease management and patient care. While recent innovations have led to specialized models for various CXR interpretation tasks, these solutions often operate in isolation, limiting their practical utility in clinical practice. We present MedRAX, the first versatile AI agent that seamlessly integrates state-of-the-art CXR analysis tools and multimodal large language models into a unified framework. MedRAX dynamically leverages these models to address complex medical queries without requiring additional training. To rigorously evaluate its capabilities, we introduce ChestAgentBench, a comprehensive benchmark containing 2,500 complex medical queries across 7 diverse categories. Our experiments demonstrate that MedRAX achieves state-of-the-art performance compared to both open-source and proprietary models, representing a significant step toward the practical deployment of automated CXR interpretation systems. Data and code have been publicly available at https://github.com/bowang-lab/MedRAX <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02673v1-abstract-full').style.display = 'none'; document.getElementById('2502.02673v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 4 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01080">arXiv:2502.01080</a> <span> [<a href="https://arxiv.org/pdf/2502.01080">pdf</a>, <a href="https://arxiv.org/format/2502.01080">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TCSVT.2023.3318216">10.1109/TCSVT.2023.3318216 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> BC-GAN: A Generative Adversarial Network for Synthesizing a Batch of Collocated Clothing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+D">Dongliang Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haijun Zhang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianghong Ma</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+J">Jianyang Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01080v1-abstract-short" style="display: inline;"> Collocated clothing synthesis using generative networks has become an emerging topic in the field of fashion intelligence, as it has significant potential economic value to increase revenue in the fashion industry. In previous studies, several works have attempted to synthesize visually-collocated clothing based on a given clothing item using generative adversarial networks (GANs) with promising r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01080v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01080v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01080v1-abstract-full" style="display: none;"> Collocated clothing synthesis using generative networks has become an emerging topic in the field of fashion intelligence, as it has significant potential economic value to increase revenue in the fashion industry. In previous studies, several works have attempted to synthesize visually-collocated clothing based on a given clothing item using generative adversarial networks (GANs) with promising results. These works, however, can only accomplish the synthesis of one collocated clothing item each time. Nevertheless, users may require different clothing items to meet their multiple choices due to their personal tastes and different dressing scenarios. To address this limitation, we introduce a novel batch clothing generation framework, named BC-GAN, which is able to synthesize multiple visually-collocated clothing images simultaneously. In particular, to further improve the fashion compatibility of synthetic results, BC-GAN proposes a new fashion compatibility discriminator in a contrastive learning perspective by fully exploiting the collocation relationship among all clothing items. Our model was examined in a large-scale dataset with compatible outfits constructed by ourselves. Extensive experiment results confirmed the effectiveness of our proposed BC-GAN in comparison to state-of-the-art methods in terms of diversity, visual authenticity, and fashion compatibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01080v1-abstract-full').style.display = 'none'; document.getElementById('2502.01080v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper was accepted by IEEE TCSVT</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01055">arXiv:2502.01055</a> <span> [<a href="https://arxiv.org/pdf/2502.01055">pdf</a>, <a href="https://arxiv.org/format/2502.01055">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> On the Surprising Robustness of Sequential Convex Optimization for Contact-Implicit Motion Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yulin Li</a>, <a href="/search/cs?searchtype=author&query=Han%2C+H">Haoyu Han</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+S">Shucheng Kang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Heng Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01055v1-abstract-short" style="display: inline;"> Contact-implicit motion planning-embedding contact sequencing as implicit complementarity constraints-holds the promise of leveraging continuous optimization to discover new contact patterns online. Nevertheless, the resulting optimization, being an instance of Mathematical Programming with Complementary Constraints, fails the classical constraint qualifications that are crucial for the convergenc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01055v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01055v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01055v1-abstract-full" style="display: none;"> Contact-implicit motion planning-embedding contact sequencing as implicit complementarity constraints-holds the promise of leveraging continuous optimization to discover new contact patterns online. Nevertheless, the resulting optimization, being an instance of Mathematical Programming with Complementary Constraints, fails the classical constraint qualifications that are crucial for the convergence of popular numerical solvers. We present robust contact-implicit motion planning with sequential convex programming (CRISP), a solver that departs from the usual primal-dual algorithmic framework but instead only focuses on the primal problem. CRISP solves a convex quadratic program with an adaptive trust region radius at each iteration, and its convergence is evaluated by a merit function using weighted penalty. We (i) provide sufficient conditions on CRISP's convergence to first-order stationary points of the merit function; (ii) release a high-performance C++ implementation of CRISP with a generic nonlinear programming interface; and (iii) demonstrate CRISP's surprising robustness in solving contact-implicit planning with naive initialization. In fact, CRISP solves several contact-implicit problems with all-zero initialization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01055v1-abstract-full').style.display = 'none'; document.getElementById('2502.01055v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01029">arXiv:2502.01029</a> <span> [<a href="https://arxiv.org/pdf/2502.01029">pdf</a>, <a href="https://arxiv.org/format/2502.01029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Comprehensive Modeling Approaches for Forecasting Bitcoin Transaction Fees: A Comparative Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiangqin Ma</a>, <a href="/search/cs?searchtype=author&query=Mahmoudinia%2C+E">Erfan Mahmoudinia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01029v1-abstract-short" style="display: inline;"> Transaction fee prediction in Bitcoin's ecosystem represents a crucial challenge affecting both user costs and miner revenue optimization. This study presents a systematic evaluation of six predictive models for forecasting Bitcoin transaction fees across a 24-hour horizon (144 blocks): SARIMAX, Prophet, Time2Vec, Time2Vec with Attention, a Hybrid model combining SARIMAX with Gradient Boosting, an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01029v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01029v1-abstract-full" style="display: none;"> Transaction fee prediction in Bitcoin's ecosystem represents a crucial challenge affecting both user costs and miner revenue optimization. This study presents a systematic evaluation of six predictive models for forecasting Bitcoin transaction fees across a 24-hour horizon (144 blocks): SARIMAX, Prophet, Time2Vec, Time2Vec with Attention, a Hybrid model combining SARIMAX with Gradient Boosting, and the Temporal Fusion Transformer (TFT). Our approach integrates comprehensive feature engineering spanning mempool metrics, network parameters, and historical fee patterns to capture the multifaceted dynamics of fee behavior. Through rigorous 5-fold cross-validation and independent testing, our analysis reveals that traditional statistical approaches outperform more complex deep learning architectures. The SARIMAX model achieves superior accuracy on the independent test set, while Prophet demonstrates strong performance during cross-validation. Notably, sophisticated deep learning models like Time2Vec and TFT show comparatively lower predictive power despite their architectural complexity. This performance disparity likely stems from the relatively constrained training dataset of 91 days, suggesting that deep learning models may achieve enhanced results with extended historical data. These findings offer significant practical implications for cryptocurrency stakeholders, providing empirically-validated guidance for fee-sensitive decision making while illuminating critical considerations in model selection based on data constraints. The study establishes a foundation for advanced fee prediction while highlighting the current advantages of traditional statistical methods in this domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01029v1-abstract-full').style.display = 'none'; document.getElementById('2502.01029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00992">arXiv:2502.00992</a> <span> [<a href="https://arxiv.org/pdf/2502.00992">pdf</a>, <a href="https://arxiv.org/format/2502.00992">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3581783.3612036">10.1145/3581783.3612036 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> FCBoost-Net: A Generative Network for Synthesizing Multiple Collocated Outfits via Fashion Compatibility Boosting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+D">Dongliang Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haijun Zhang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianghong Ma</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jicong Fan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00992v1-abstract-short" style="display: inline;"> Outfit generation is a challenging task in the field of fashion technology, in which the aim is to create a collocated set of fashion items that complement a given set of items. Previous studies in this area have been limited to generating a unique set of fashion items based on a given set of items, without providing additional options to users. This lack of a diverse range of choices necessitates… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00992v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00992v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00992v1-abstract-full" style="display: none;"> Outfit generation is a challenging task in the field of fashion technology, in which the aim is to create a collocated set of fashion items that complement a given set of items. Previous studies in this area have been limited to generating a unique set of fashion items based on a given set of items, without providing additional options to users. This lack of a diverse range of choices necessitates the development of a more versatile framework. However, when the task of generating collocated and diversified outfits is approached with multimodal image-to-image translation methods, it poses a challenging problem in terms of non-aligned image translation, which is hard to address with existing methods. In this research, we present FCBoost-Net, a new framework for outfit generation that leverages the power of pre-trained generative models to produce multiple collocated and diversified outfits. Initially, FCBoost-Net randomly synthesizes multiple sets of fashion items, and the compatibility of the synthesized sets is then improved in several rounds using a novel fashion compatibility booster. This approach was inspired by boosting algorithms and allows the performance to be gradually improved in multiple steps. Empirical evidence indicates that the proposed strategy can improve the fashion compatibility of randomly synthesized fashion items as well as maintain their diversity. Extensive experiments confirm the effectiveness of our proposed framework with respect to visual authenticity, diversity, and fashion compatibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00992v1-abstract-full').style.display = 'none'; document.getElementById('2502.00992v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted for presentation at ACM Multimedia 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00581">arXiv:2502.00581</a> <span> [<a href="https://arxiv.org/pdf/2502.00581">pdf</a>, <a href="https://arxiv.org/format/2502.00581">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Trajectory Planning and Control for Differentially Flat Fixed-Wing Aerial Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Morando%2C+L">Luca Morando</a>, <a href="/search/cs?searchtype=author&query=Salunkhe%2C+S+A">Sanket A. Salunkhe</a>, <a href="/search/cs?searchtype=author&query=Bobbili%2C+N">Nishanth Bobbili</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+J">Jeffrey Mao</a>, <a href="/search/cs?searchtype=author&query=Masci%2C+L">Luca Masci</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hung Nguyen</a>, <a href="/search/cs?searchtype=author&query=de+Souza%2C+C">Cristino de Souza</a>, <a href="/search/cs?searchtype=author&query=Loianno%2C+G">Giuseppe Loianno</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00581v1-abstract-short" style="display: inline;"> Efficient real-time trajectory planning and control for fixed-wing unmanned aerial vehicles is challenging due to their non-holonomic nature, complex dynamics, and the additional uncertainties introduced by unknown aerodynamic effects. In this paper, we present a fast and efficient real-time trajectory planning and control approach for fixed-wing unmanned aerial vehicles, leveraging the differenti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00581v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00581v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00581v1-abstract-full" style="display: none;"> Efficient real-time trajectory planning and control for fixed-wing unmanned aerial vehicles is challenging due to their non-holonomic nature, complex dynamics, and the additional uncertainties introduced by unknown aerodynamic effects. In this paper, we present a fast and efficient real-time trajectory planning and control approach for fixed-wing unmanned aerial vehicles, leveraging the differential flatness property of fixed-wing aircraft in coordinated flight conditions to generate dynamically feasible trajectories. The approach provides the ability to continuously replan trajectories, which we show is useful to dynamically account for the curvature constraint as the aircraft advances along its path. Extensive simulations and real-world experiments validate our approach, showcasing its effectiveness in generating trajectories even in challenging conditions for small FW such as wind disturbances. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00581v1-abstract-full').style.display = 'none'; document.getElementById('2502.00581v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Approved at Icra 25</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Admitted for Publication at 2025 IEEE International Conference on Robotics and Autonomous Systems (ICRA 2025) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18984">arXiv:2501.18984</a> <span> [<a href="https://arxiv.org/pdf/2501.18984">pdf</a>, <a href="https://arxiv.org/format/2501.18984">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Context Matters: Query-aware Dynamic Long Sequence Modeling of Gigapixel Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhengrui Guo</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Q">Qichen Sun</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiabo Ma</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+L">Lishuang Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinzhuo Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18984v1-abstract-short" style="display: inline;"> Whole slide image (WSI) analysis presents significant computational challenges due to the massive number of patches in gigapixel images. While transformer architectures excel at modeling long-range correlations through self-attention, their quadratic computational complexity makes them impractical for computational pathology applications. Existing solutions like local-global or linear self-attenti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18984v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18984v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18984v1-abstract-full" style="display: none;"> Whole slide image (WSI) analysis presents significant computational challenges due to the massive number of patches in gigapixel images. While transformer architectures excel at modeling long-range correlations through self-attention, their quadratic computational complexity makes them impractical for computational pathology applications. Existing solutions like local-global or linear self-attention reduce computational costs but compromise the strong modeling capabilities of full self-attention. In this work, we propose Querent, i.e., the query-aware long contextual dynamic modeling framework, which maintains the expressive power of full self-attention while achieving practical efficiency. Our method adaptively predicts which surrounding regions are most relevant for each patch, enabling focused yet unrestricted attention computation only with potentially important contexts. By using efficient region-wise metadata computation and importance estimation, our approach dramatically reduces computational overhead while preserving global perception to model fine-grained patch correlations. Through comprehensive experiments on biomarker prediction, gene mutation prediction, cancer subtyping, and survival analysis across over 10 WSI datasets, our method demonstrates superior performance compared to the state-of-the-art approaches. Code will be made available at https://github.com/dddavid4real/Querent. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18984v1-abstract-full').style.display = 'none'; document.getElementById('2501.18984v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 6 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18119">arXiv:2501.18119</a> <span> [<a href="https://arxiv.org/pdf/2501.18119">pdf</a>, <a href="https://arxiv.org/format/2501.18119">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Self-supervised Quantized Representation for Seamlessly Integrating Knowledge Graphs with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Qika Lin</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+T">Tianzhe Zhao</a>, <a href="/search/cs?searchtype=author&query=He%2C+K">Kai He</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Zhen Peng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fangzhi Xu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Ling Huang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jingying Ma</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+M">Mengling Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18119v1-abstract-short" style="display: inline;"> Due to the presence of the natural gap between Knowledge Graph (KG) structures and the natural language, the effective integration of holistic structural information of KGs with Large Language Models (LLMs) has emerged as a significant question. To this end, we propose a two-stage framework to learn and apply quantized codes for each entity, aiming for the seamless integration of KGs with LLMs. Fi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18119v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18119v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18119v1-abstract-full" style="display: none;"> Due to the presence of the natural gap between Knowledge Graph (KG) structures and the natural language, the effective integration of holistic structural information of KGs with Large Language Models (LLMs) has emerged as a significant question. To this end, we propose a two-stage framework to learn and apply quantized codes for each entity, aiming for the seamless integration of KGs with LLMs. Firstly, a self-supervised quantized representation (SSQR) method is proposed to compress both KG structural and semantic knowledge into discrete codes (\ie, tokens) that align the format of language sentences. We further design KG instruction-following data by viewing these learned codes as features to directly input to LLMs, thereby achieving seamless integration. The experiment results demonstrate that SSQR outperforms existing unsupervised quantized methods, producing more distinguishable codes. Further, the fine-tuned LLaMA2 and LLaMA3.1 also have superior performance on KG link prediction and triple classification tasks, utilizing only 16 tokens per entity instead of thousands in conventional prompting methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18119v1-abstract-full').style.display = 'none'; document.getElementById('2501.18119v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16656">arXiv:2501.16656</a> <span> [<a href="https://arxiv.org/pdf/2501.16656">pdf</a>, <a href="https://arxiv.org/format/2501.16656">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Data Mining in Transportation Networks with Graph Neural Networks: A Review and Outlook </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+J">Jiawei Xue</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+R">Ruichen Tan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianzhu Ma</a>, <a href="/search/cs?searchtype=author&query=Ukkusuri%2C+S+V">Satish V. Ukkusuri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16656v1-abstract-short" style="display: inline;"> Data mining in transportation networks (DMTNs) refers to using diverse types of spatio-temporal data for various transportation tasks, including pattern analysis, traffic prediction, and traffic controls. Graph neural networks (GNNs) are essential in many DMTN problems due to their capability to represent spatial correlations between entities. Between 2016 and 2024, the notable applications of GNN… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16656v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16656v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16656v1-abstract-full" style="display: none;"> Data mining in transportation networks (DMTNs) refers to using diverse types of spatio-temporal data for various transportation tasks, including pattern analysis, traffic prediction, and traffic controls. Graph neural networks (GNNs) are essential in many DMTN problems due to their capability to represent spatial correlations between entities. Between 2016 and 2024, the notable applications of GNNs in DMTNs have extended to multiple fields such as traffic prediction and operation. However, existing reviews have primarily focused on traffic prediction tasks. To fill this gap, this study provides a timely and insightful summary of GNNs in DMTNs, highlighting new progress in prediction and operation from academic and industry perspectives since 2023. First, we present and analyze various DMTN problems, followed by classical and recent GNN models. Second, we delve into key works in three areas: (1) traffic prediction, (2) traffic operation, and (3) industry involvement, such as Google Maps, Amap, and Baidu Maps. Along these directions, we discuss new research opportunities based on the significance of transportation problems and data availability. Finally, we compile resources such as data, code, and other learning materials to foster interdisciplinary communication. This review, driven by recent trends in GNNs in DMTN studies since 2023, could democratize abundant datasets and efficient GNN methods for various transportation problems including prediction and operation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16656v1-abstract-full').style.display = 'none'; document.getElementById('2501.16656v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">41 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16411">arXiv:2501.16411</a> <span> [<a href="https://arxiv.org/pdf/2501.16411">pdf</a>, <a href="https://arxiv.org/format/2501.16411">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> PhysBench: Benchmarking and Enhancing Vision-Language Models for Physical World Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chow%2C+W">Wei Chow</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+J">Jiageng Mao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Boyi Li</a>, <a href="/search/cs?searchtype=author&query=Seita%2C+D">Daniel Seita</a>, <a href="/search/cs?searchtype=author&query=Guizilini%2C+V">Vitor Guizilini</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16411v2-abstract-short" style="display: inline;"> Understanding the physical world is a fundamental challenge in embodied AI, critical for enabling agents to perform complex tasks and operate safely in real-world environments. While Vision-Language Models (VLMs) have shown great promise in reasoning and task planning for embodied agents, their ability to comprehend physical phenomena remains extremely limited. To close this gap, we introduce Phys… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16411v2-abstract-full').style.display = 'inline'; document.getElementById('2501.16411v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16411v2-abstract-full" style="display: none;"> Understanding the physical world is a fundamental challenge in embodied AI, critical for enabling agents to perform complex tasks and operate safely in real-world environments. While Vision-Language Models (VLMs) have shown great promise in reasoning and task planning for embodied agents, their ability to comprehend physical phenomena remains extremely limited. To close this gap, we introduce PhysBench, a comprehensive benchmark designed to evaluate VLMs' physical world understanding capability across a diverse set of tasks. PhysBench contains 10,002 entries of interleaved video-image-text data, categorized into four major domains: physical object properties, physical object relationships, physical scene understanding, and physics-based dynamics, further divided into 19 subclasses and 8 distinct capability dimensions. Our extensive experiments, conducted on 75 representative VLMs, reveal that while these models excel in common-sense reasoning, they struggle with understanding the physical world -- likely due to the absence of physical knowledge in their training data and the lack of embedded physical priors. To tackle the shortfall, we introduce PhysAgent, a novel framework that combines the generalization strengths of VLMs with the specialized expertise of vision models, significantly enhancing VLMs' physical understanding across a variety of tasks, including an 18.4\% improvement on GPT-4o. Furthermore, our results demonstrate that enhancing VLMs' physical world understanding capabilities can help embodied agents such as MOKA. We believe that PhysBench and PhysAgent offer valuable insights and contribute to bridging the gap between VLMs and physical world understanding. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16411v2-abstract-full').style.display = 'none'; document.getElementById('2501.16411v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025. Project page: https://physbench.github.io/ Dataset: https://huggingface.co/datasets/USC-GVL/PhysBench</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16388">arXiv:2501.16388</a> <span> [<a href="https://arxiv.org/pdf/2501.16388">pdf</a>, <a href="https://arxiv.org/format/2501.16388">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Development and Validation of a Dynamic Kidney Failure Prediction Model based on Deep Learning: A Real-World Study with External Validation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jingying Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinwei Wang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Lanlan Lu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yexiang Sun</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+M">Mengling Feng</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+P">Peng Shen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhiqin Jiang</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+S">Shenda Hong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Luxia Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16388v1-abstract-short" style="display: inline;"> Background: Chronic kidney disease (CKD), a progressive disease with high morbidity and mortality, has become a significant global public health problem. At present, most of the models used for predicting the progression of CKD are static models. We aim to develop a dynamic kidney failure prediction model based on deep learning (KFDeep) for CKD patients, utilizing all available data on common clin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16388v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16388v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16388v1-abstract-full" style="display: none;"> Background: Chronic kidney disease (CKD), a progressive disease with high morbidity and mortality, has become a significant global public health problem. At present, most of the models used for predicting the progression of CKD are static models. We aim to develop a dynamic kidney failure prediction model based on deep learning (KFDeep) for CKD patients, utilizing all available data on common clinical indicators from real-world Electronic Health Records (EHRs) to provide real-time predictions. Findings: A retrospective cohort of 4,587 patients from EHRs of Yinzhou, China, is used as the development dataset (2,752 patients for training, 917 patients for validation) and internal validation dataset (917 patients), while a prospective cohort of 934 patients from the Peking University First Hospital CKD cohort (PKUFH cohort) is used as the external validation dataset. The AUROC of the KFDeep model reaches 0.946 (95\% CI: 0.922-0.970) on the internal validation dataset and 0.805 (95\% CI: 0.763-0.847) on the external validation dataset, both surpassing existing models. The KFDeep model demonstrates stable performance in simulated dynamic scenarios, with the AUROC progressively increasing over time. Both the calibration curve and decision curve analyses confirm that the model is unbiased and safe for practical use, while the SHAP analysis and hidden layer clustering results align with established medical knowledge. Interpretation: The KFDeep model built from real-world EHRs enhances the prediction accuracy of kidney failure without increasing clinical examination costs and can be easily integrated into existing hospital systems, providing physicians with a continuously updated decision-support tool due to its dynamic design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16388v1-abstract-full').style.display = 'none'; document.getElementById('2501.16388v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15755">arXiv:2501.15755</a> <span> [<a href="https://arxiv.org/pdf/2501.15755">pdf</a>, <a href="https://arxiv.org/format/2501.15755">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GraphICL: Unlocking Graph Learning Potential in LLMs through Structured Prompt Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yuanfu Sun</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Z">Zhengnan Ma</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yi Fang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jing Ma</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Q">Qiaoyu Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15755v1-abstract-short" style="display: inline;"> The growing importance of textual and relational systems has driven interest in enhancing large language models (LLMs) for graph-structured data, particularly Text-Attributed Graphs (TAGs), where samples are represented by textual descriptions interconnected by edges. While research has largely focused on developing specialized graph LLMs through task-specific instruction tuning, a comprehensive b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15755v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15755v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15755v1-abstract-full" style="display: none;"> The growing importance of textual and relational systems has driven interest in enhancing large language models (LLMs) for graph-structured data, particularly Text-Attributed Graphs (TAGs), where samples are represented by textual descriptions interconnected by edges. While research has largely focused on developing specialized graph LLMs through task-specific instruction tuning, a comprehensive benchmark for evaluating LLMs solely through prompt design remains surprisingly absent. Without such a carefully crafted evaluation benchmark, most if not all, tailored graph LLMs are compared against general LLMs using simplistic queries (e.g., zero-shot reasoning with LLaMA), which can potentially camouflage many advantages as well as unexpected predicaments of them. To achieve more general evaluations and unveil the true potential of LLMs for graph tasks, we introduce Graph In-context Learning (GraphICL) Benchmark, a comprehensive benchmark comprising novel prompt templates designed to capture graph structure and handle limited label knowledge. Our systematic evaluation shows that general-purpose LLMs equipped with our GraphICL outperform state-of-the-art specialized graph LLMs and graph neural network models in resource-constrained settings and out-of-domain tasks. These findings highlight the significant potential of prompt engineering to enhance LLM performance on graph learning tasks without training and offer a strong baseline for advancing research in graph LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15755v1-abstract-full').style.display = 'none'; document.getElementById('2501.15755v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15228">arXiv:2501.15228</a> <span> [<a href="https://arxiv.org/pdf/2501.15228">pdf</a>, <a href="https://arxiv.org/format/2501.15228">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Improving Retrieval-Augmented Generation through Multi-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yiqun Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyong Yan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Weiwei Sun</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yiming Yang</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+J">Jiaxin Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15228v1-abstract-short" style="display: inline;"> Retrieval-augmented generation (RAG) is extensively utilized to incorporate external, current knowledge into large language models, thereby minimizing hallucinations. A standard RAG pipeline may comprise several components, such as query rewriting, document retrieval, document filtering, and answer generation. However, these components are typically optimized separately through supervised fine-tun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15228v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15228v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15228v1-abstract-full" style="display: none;"> Retrieval-augmented generation (RAG) is extensively utilized to incorporate external, current knowledge into large language models, thereby minimizing hallucinations. A standard RAG pipeline may comprise several components, such as query rewriting, document retrieval, document filtering, and answer generation. However, these components are typically optimized separately through supervised fine-tuning, which can lead to misalignments between the objectives of individual modules and the overarching aim of generating accurate answers in question-answering (QA) tasks. Although recent efforts have explored reinforcement learning (RL) to optimize specific RAG components, these approaches often focus on overly simplistic pipelines with only two components or do not adequately address the complex interdependencies and collaborative interactions among the modules. To overcome these challenges, we propose treating the RAG pipeline as a multi-agent cooperative task, with each component regarded as an RL agent. Specifically, we present MMOA-RAG, a Multi-Module joint Optimization Algorithm for RAG, which employs multi-agent reinforcement learning to harmonize all agents' goals towards a unified reward, such as the F1 score of the final answer. Experiments conducted on various QA datasets demonstrate that MMOA-RAG improves the overall pipeline performance and outperforms existing baselines. Furthermore, comprehensive ablation studies validate the contributions of individual components and the adaptability of MMOA-RAG across different RAG components and datasets. The code of MMOA-RAG is on https://github.com/chenyiqun/MMOA-RAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15228v1-abstract-full').style.display = 'none'; document.getElementById('2501.15228v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15055">arXiv:2501.15055</a> <span> [<a href="https://arxiv.org/pdf/2501.15055">pdf</a>, <a href="https://arxiv.org/format/2501.15055">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Group Ligands Docking to Protein Pockets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guan%2C+J">Jiaqi Guan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiahan Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiangxin Zhou</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+X">Xingang Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sheng Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yunan Luo</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+J">Jian Peng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianzhu Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15055v1-abstract-short" style="display: inline;"> Molecular docking is a key task in computational biology that has attracted increasing interest from the machine learning community. While existing methods have achieved success, they generally treat each protein-ligand pair in isolation. Inspired by the biochemical observation that ligands binding to the same target protein tend to adopt similar poses, we propose \textsc{GroupBind}, a novel molec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15055v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15055v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15055v1-abstract-full" style="display: none;"> Molecular docking is a key task in computational biology that has attracted increasing interest from the machine learning community. While existing methods have achieved success, they generally treat each protein-ligand pair in isolation. Inspired by the biochemical observation that ligands binding to the same target protein tend to adopt similar poses, we propose \textsc{GroupBind}, a novel molecular docking framework that simultaneously considers multiple ligands docking to a protein. This is achieved by introducing an interaction layer for the group of ligands and a triangle attention module for embedding protein-ligand and group-ligand pairs. By integrating our approach with diffusion-based docking model, we set a new S performance on the PDBBind blind docking benchmark, demonstrating the effectiveness of our proposed molecular docking paradigm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15055v1-abstract-full').style.display = 'none'; document.getElementById('2501.15055v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, published in ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14760">arXiv:2501.14760</a> <span> [<a href="https://arxiv.org/pdf/2501.14760">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> AI Meets Natural Hazard Risk: A Nationwide Vulnerability Assessment of Data Centers to Natural Hazards and Power Outages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Esparza%2C+M">Miguel Esparza</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Junwei Ma</a>, <a href="/search/cs?searchtype=author&query=Mostafavi%2C+A">Ali Mostafavi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14760v1-abstract-short" style="display: inline;"> Our society is on the verge of a revolution powered by Artificial Intelligence (AI) technologies. With increasing advancements in AI, there is a growing expansion in data centers (DCs) serving as critical infrastructure for this new wave of technologies. This technological wave is also on a collision course with exacerbating climate hazards which raises the need for evaluating the vulnerability of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14760v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14760v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14760v1-abstract-full" style="display: none;"> Our society is on the verge of a revolution powered by Artificial Intelligence (AI) technologies. With increasing advancements in AI, there is a growing expansion in data centers (DCs) serving as critical infrastructure for this new wave of technologies. This technological wave is also on a collision course with exacerbating climate hazards which raises the need for evaluating the vulnerability of DCs to various hazards. Hence, the objective of this research is to conduct a nationwide vulnerability assessment of (DCs) in the United States of America (USA). DCs provide such support; however, if an unplanned disruption (like a natural hazard or power outage) occurs, the functionality of DCs are in jeopardy. Unplanned downtime in DCs cause severe economic and social repercussions. With the Local Indicator of Spatial Association (LISA) test, the research found that there are a large percentage of DCs that are in non-vulnerable areas of disruption; however, there is still a notable percentage in disruption prone areas. For example, earthquakes, hurricanes, and tornadoes have the most DCs in vulnerable areas. After identifying these vulnerabilities, the research identified areas within the USA that have minimal vulnerabilities to both the aforementioned natural hazards and power outages with the BI-LISA test. After doing a composite vulnerability score on the Cold-Spots from the BILISA analysis, the research found three counties with the low vulnerability scores. These are Koochiching, Minnesota (0.091), Schoolcraft, Michigan (0.095), and Houghton, Michigan (0.096). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14760v1-abstract-full').style.display = 'none'; document.getElementById('2501.14760v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14351">arXiv:2501.14351</a> <span> [<a href="https://arxiv.org/pdf/2501.14351">pdf</a>, <a href="https://arxiv.org/format/2501.14351">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Geophysics">physics.geo-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Facies Classification with Copula Entropy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jian Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14351v1-abstract-short" style="display: inline;"> In this paper we propose to apply copula entropy (CE) to facies classification. In our method, the correlations between geological variables and facies classes are measured with CE and then the variables associated with large negative CEs are selected for classification. We verified the proposed method on a typical facies dataset for facies classification and the experimental results show that the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14351v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14351v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14351v1-abstract-full" style="display: none;"> In this paper we propose to apply copula entropy (CE) to facies classification. In our method, the correlations between geological variables and facies classes are measured with CE and then the variables associated with large negative CEs are selected for classification. We verified the proposed method on a typical facies dataset for facies classification and the experimental results show that the proposed method can select less geological variables for facies classification without sacrificing classification performance. The geological variables such selected are also interpretable to geologists with geological meanings due to the rigorous definition of CE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14351v1-abstract-full').style.display = 'none'; document.getElementById('2501.14351v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 5 figures, 3 tables. arXiv admin note: text overlap with arXiv:2310.16633</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14216">arXiv:2501.14216</a> <span> [<a href="https://arxiv.org/pdf/2501.14216">pdf</a>, <a href="https://arxiv.org/format/2501.14216">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> TFG-Flow: Training-free Guidance in Multimodal Generative Flow </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+H">Haowei Lin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shanda Li</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+H">Haotian Ye</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yiming Yang</a>, <a href="/search/cs?searchtype=author&query=Ermon%2C+S">Stefano Ermon</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yitao Liang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianzhu Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14216v1-abstract-short" style="display: inline;"> Given an unconditional generative model and a predictor for a target property (e.g., a classifier), the goal of training-free guidance is to generate samples with desirable target properties without additional training. As a highly efficient technique for steering generative models toward flexible outcomes, training-free guidance has gained increasing attention in diffusion models. However, existi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14216v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14216v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14216v1-abstract-full" style="display: none;"> Given an unconditional generative model and a predictor for a target property (e.g., a classifier), the goal of training-free guidance is to generate samples with desirable target properties without additional training. As a highly efficient technique for steering generative models toward flexible outcomes, training-free guidance has gained increasing attention in diffusion models. However, existing methods only handle data in continuous spaces, while many scientific applications involve both continuous and discrete data (referred to as multimodality). Another emerging trend is the growing use of the simple and general flow matching framework in building generative foundation models, where guided generation remains under-explored. To address this, we introduce TFG-Flow, a novel training-free guidance method for multimodal generative flow. TFG-Flow addresses the curse-of-dimensionality while maintaining the property of unbiased sampling in guiding discrete variables. We validate TFG-Flow on four molecular design tasks and show that TFG-Flow has great potential in drug design by generating molecules with desired properties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14216v1-abstract-full').style.display = 'none'; document.getElementById('2501.14216v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ICLR 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13669">arXiv:2501.13669</a> <span> [<a href="https://arxiv.org/pdf/2501.13669">pdf</a>, <a href="https://arxiv.org/format/2501.13669">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> How to Complete Domain Tuning while Keeping General Ability in LLM: Adaptive Layer-wise and Element-wise Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+S">Shezheng Song</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hao Xu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shasha Li</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+L">Long Peng</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+Q">Qian Wan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaodong Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jie Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13669v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) exhibit strong general-purpose language capabilities. However, fine-tuning these models on domain-specific tasks often leads to catastrophic forgetting, where the model overwrites or loses essential knowledge acquired during pretraining. This phenomenon significantly limits the broader applicability of LLMs. To address this challenge, we propose a novel approach to com… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13669v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13669v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13669v1-abstract-full" style="display: none;"> Large Language Models (LLMs) exhibit strong general-purpose language capabilities. However, fine-tuning these models on domain-specific tasks often leads to catastrophic forgetting, where the model overwrites or loses essential knowledge acquired during pretraining. This phenomenon significantly limits the broader applicability of LLMs. To address this challenge, we propose a novel approach to compute the element-wise importance of model parameters crucial for preserving general knowledge during fine-tuning. Our method utilizes a dual-objective optimization strategy: (1) regularization loss to retain the parameter crucial for general knowledge; (2) cross-entropy loss to adapt to domain-specific tasks. Additionally, we introduce layer-wise coefficients to account for the varying contributions of different layers, dynamically balancing the dual-objective optimization. Extensive experiments on scientific, medical, and physical tasks using GPT-J and LLaMA-3 demonstrate that our approach mitigates catastrophic forgetting while enhancing model adaptability. Compared to previous methods, our solution is approximately 20 times faster and requires only 10%-15% of the storage, highlighting the practical efficiency. The code will be released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13669v1-abstract-full').style.display = 'none'; document.getElementById('2501.13669v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13570">arXiv:2501.13570</a> <span> [<a href="https://arxiv.org/pdf/2501.13570">pdf</a>, <a href="https://arxiv.org/format/2501.13570">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Occamy: A Preemptive Buffer Management for On-chip Shared-memory Switches </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shan%2C+D">Danfeng Shan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunguang Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jinchao Ma</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhenxing Zhang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Z">Zeyu Liang</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+X">Xinyu Wen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+W">Wanchun Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nan Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+F">Fengyuan Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13570v1-abstract-short" style="display: inline;"> Today's high-speed switches employ an on-chip shared packet buffer. The buffer is becoming increasingly insufficient as it cannot scale with the growing switching capacity. Nonetheless, the buffer needs to face highly intense bursts and meet stringent performance requirements for datacenter applications. This imposes rigorous demand on the Buffer Management (BM) scheme, which dynamically allocates… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13570v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13570v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13570v1-abstract-full" style="display: none;"> Today's high-speed switches employ an on-chip shared packet buffer. The buffer is becoming increasingly insufficient as it cannot scale with the growing switching capacity. Nonetheless, the buffer needs to face highly intense bursts and meet stringent performance requirements for datacenter applications. This imposes rigorous demand on the Buffer Management (BM) scheme, which dynamically allocates the buffer across queues. However, the de facto BM scheme, designed over two decades ago, is ill-suited to meet the requirements of today's network. In this paper, we argue that shallow-buffer switches, intense bursts, along with dynamic traffic call for a highly agile BM that can quickly adjust the buffer allocation as traffic changes. However, the agility of the current BM is fundamentally limited by its non-preemptive nature. Nonetheless, we find that preemptive BM, considered unrealizable in history, is now feasible on modern switch chips. We propose Occamy, a preemptive BM that can quickly adjust buffer allocation. Occamy utilizes the redundant memory bandwidth to actively reclaim and reallocate the over-allocated buffer. Testbed experiments and large-scale simulations show that Occamy can improve the end-to-end performance by up to ~55%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13570v1-abstract-full').style.display = 'none'; document.getElementById('2501.13570v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 23 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13396">arXiv:2501.13396</a> <span> [<a href="https://arxiv.org/pdf/2501.13396">pdf</a>, <a href="https://arxiv.org/format/2501.13396">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICASSP48485.2024.10446336">10.1109/ICASSP48485.2024.10446336 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Towards Intelligent Design: A Self-driven Framework for Collocated Clothing Synthesis Leveraging Fashion Styles and Textures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+M">Minglong Dong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+D">Dongliang Zhou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianghong Ma</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haijun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13396v1-abstract-short" style="display: inline;"> Collocated clothing synthesis (CCS) has emerged as a pivotal topic in fashion technology, primarily concerned with the generation of a clothing item that harmoniously matches a given item. However, previous investigations have relied on using paired outfits, such as a pair of matching upper and lower clothing, to train a generative model for achieving this task. This reliance on the expertise of f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13396v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13396v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13396v1-abstract-full" style="display: none;"> Collocated clothing synthesis (CCS) has emerged as a pivotal topic in fashion technology, primarily concerned with the generation of a clothing item that harmoniously matches a given item. However, previous investigations have relied on using paired outfits, such as a pair of matching upper and lower clothing, to train a generative model for achieving this task. This reliance on the expertise of fashion professionals in the construction of such paired outfits has engendered a laborious and time-intensive process. In this paper, we introduce a new self-driven framework, named style- and texture-guided generative network (ST-Net), to synthesize collocated clothing without the necessity for paired outfits, leveraging self-supervised learning. ST-Net is designed to extrapolate fashion compatibility rules from the style and texture attributes of clothing, using a generative adversarial network. To facilitate the training and evaluation of our model, we have constructed a large-scale dataset specifically tailored for unsupervised CCS. Extensive experiments substantiate that our proposed method outperforms the state-of-the-art baselines in terms of both visual authenticity and fashion compatibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13396v1-abstract-full').style.display = 'none'; document.getElementById('2501.13396v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted for presentation at ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12147">arXiv:2501.12147</a> <span> [<a href="https://arxiv.org/pdf/2501.12147">pdf</a>, <a href="https://arxiv.org/format/2501.12147">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Influence-based Instruction Tuning Data Selection for Balanced Learning of Diverse Capabilities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dai%2C+Q">Qirun Dai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dylan Zhang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J+W">Jiaqi W. Ma</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+H">Hao Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12147v1-abstract-short" style="display: inline;"> Selecting appropriate training data is crucial for effective instruction fine-tuning of large language models (LLMs), which aims to (1) elicit strong capabilities, and (2) achieve balanced performance across a diverse range of tasks. Influence-based methods show promise in achieving (1) by estimating the contribution of each training example to the model's predictions, but often struggle with (2).… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12147v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12147v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12147v1-abstract-full" style="display: none;"> Selecting appropriate training data is crucial for effective instruction fine-tuning of large language models (LLMs), which aims to (1) elicit strong capabilities, and (2) achieve balanced performance across a diverse range of tasks. Influence-based methods show promise in achieving (1) by estimating the contribution of each training example to the model's predictions, but often struggle with (2). Our systematic investigation reveals that this underperformance can be attributed to an inherent bias where certain tasks intrinsically have greater influence than others. As a result, data selection is often biased towards these tasks, not only hurting the model's performance on others but also, counterintuitively, harms performance on these high-influence tasks themselves. As a remedy, we propose BIDS, a Balanced and Influential Data Selection algorithm. BIDS first normalizes influence scores of the training data, and then iteratively balances data selection by choosing the training example with the highest influence on the most underrepresented task. Experiments with both Llama-3 and Mistral-v0.3 on seven benchmarks spanning five diverse capabilities show that BIDS consistently outperforms both state-of-the-art influence-based algorithms and other non-influence-based selection frameworks. Surprisingly, training on a 15% subset selected by BIDS can even outperform full-dataset training with a much more balanced performance. Our analysis further highlights the importance of both instance-level normalization and iterative optimization of selected data for balanced learning of diverse capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12147v1-abstract-full').style.display = 'none'; document.getElementById('2501.12147v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10835">arXiv:2501.10835</a> <span> [<a href="https://arxiv.org/pdf/2501.10835">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Anatomy of a Historic Blackout: Decoding Spatiotemporal Dynamics of Power Outages and Disparities During Hurricane Beryl </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiangpeng Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Junwei Ma</a>, <a href="/search/cs?searchtype=author&query=Mostafavi%2C+A">Ali Mostafavi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10835v2-abstract-short" style="display: inline;"> This study investigates the spatial patterns and temporal variations in outage duration, intensity, and restoration/recovery following the 2024 Hurricane Beryl in Houston, Texas. This historic blackout caused widespread power disruptions across the Houston metropolitan area, leaving more than 2 million customers without power over several days, resulting in more than 143 million total customer-out… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10835v2-abstract-full').style.display = 'inline'; document.getElementById('2501.10835v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10835v2-abstract-full" style="display: none;"> This study investigates the spatial patterns and temporal variations in outage duration, intensity, and restoration/recovery following the 2024 Hurricane Beryl in Houston, Texas. This historic blackout caused widespread power disruptions across the Houston metropolitan area, leaving more than 2 million customers without power over several days, resulting in more than 143 million total customer-out hours.The findings reveal that areas with higher population density and proximity to the hurricane's path experienced more severe initial impacts. Regions with higher median income showed faster recovery, while lower-income areas exhibited prolonged restoration periods, even with favorable infrastructural conditions, suggesting disparities in restoration speed. The study also highlights how urban development features, such as road density and land elevation, explain spatial disparities in power outage impacts and recovery. This research advances the understanding of power outage dynamics in large metropolitan regions through four key contributions: (1) empirical characterization of outages from a historic hurricane, highlighting infrastructure vulnerabilities in a high-density urban context; (2) comprehensive analysis using multiple metrics to capture spatiotemporal dynamics of outages and restoration; (3) leveraging of high-resolution outage data at fine geographic scales and frequent intervals to quantify and reveal previously masked spatial disparities; and (4) systematic examination of socioeconomic, urban development, and environmental factors in shaping disparities in outage impacts and recovery timelines. These findings provide infrastructure managers, operators, utilities, and decision-makers with crucial empirical insights to quantify power outage impacts, justify resilience investments, and address vulnerability and equity issues in the power infrastructure during hazard events. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10835v2-abstract-full').style.display = 'none'; document.getElementById('2501.10835v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09338">arXiv:2501.09338</a> <span> [<a href="https://arxiv.org/pdf/2501.09338">pdf</a>, <a href="https://arxiv.org/format/2501.09338">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Robust UAV Path Planning with Obstacle Avoidance for Emergency Rescue </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mao%2C+J">Junteng Mao</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+Z">Ziye Jia</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+H">Hanzhi Gu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+C">Chenyu Shi</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+H">Haomin Shi</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Lijun He</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qihui Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09338v1-abstract-short" style="display: inline;"> The unmanned aerial vehicles (UAVs) are efficient tools for diverse tasks such as electronic reconnaissance, agricultural operations and disaster relief. In the complex three-dimensional (3D) environments, the path planning with obstacle avoidance for UAVs is a significant issue for security assurance. In this paper, we construct a comprehensive 3D scenario with obstacles and no-fly zones for dyna… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09338v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09338v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09338v1-abstract-full" style="display: none;"> The unmanned aerial vehicles (UAVs) are efficient tools for diverse tasks such as electronic reconnaissance, agricultural operations and disaster relief. In the complex three-dimensional (3D) environments, the path planning with obstacle avoidance for UAVs is a significant issue for security assurance. In this paper, we construct a comprehensive 3D scenario with obstacles and no-fly zones for dynamic UAV trajectory. Moreover, a novel artificial potential field algorithm coupled with simulated annealing (APF-SA) is proposed to tackle the robust path planning problem. APF-SA modifies the attractive and repulsive potential functions and leverages simulated annealing to escape local minimum and converge to globally optimal solutions. Simulation results demonstrate that the effectiveness of APF-SA, enabling efficient autonomous path planning for UAVs with obstacle avoidance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09338v1-abstract-full').style.display = 'none'; document.getElementById('2501.09338v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08062">arXiv:2501.08062</a> <span> [<a href="https://arxiv.org/pdf/2501.08062">pdf</a>, <a href="https://arxiv.org/format/2501.08062">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Skeleton and Font Generation Network for Zero-shot Chinese Character Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+M">Mobai Xue</a>, <a href="/search/cs?searchtype=author&query=Du%2C+J">Jun Du</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhenrong Zhang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiefeng Ma</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+Q">Qikai Chang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+P">Pengfei Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianshu Zhang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yu Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08062v1-abstract-short" style="display: inline;"> Automatic font generation remains a challenging research issue, primarily due to the vast number of Chinese characters, each with unique and intricate structures. Our investigation of previous studies reveals inherent bias capable of causing structural changes in characters. Specifically, when generating a Chinese character similar to, but different from, those in the training samples, the bias is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08062v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08062v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08062v1-abstract-full" style="display: none;"> Automatic font generation remains a challenging research issue, primarily due to the vast number of Chinese characters, each with unique and intricate structures. Our investigation of previous studies reveals inherent bias capable of causing structural changes in characters. Specifically, when generating a Chinese character similar to, but different from, those in the training samples, the bias is prone to either correcting or ignoring these subtle variations. To address this concern, we propose a novel Skeleton and Font Generation Network (SFGN) to achieve a more robust Chinese character font generation. Our approach includes a skeleton builder and font generator. The skeleton builder synthesizes content features using low-resource text input, enabling our technique to realize font generation independently of content image inputs. Unlike previous font generation methods that treat font style as a global embedding, we introduce a font generator to align content and style features on the radical level, which is a brand-new perspective for font generation. Except for common characters, we also conduct experiments on misspelled characters, a substantial portion of which slightly differs from the common ones. Our approach visually demonstrates the efficacy of generated images and outperforms current state-of-the-art font generation methods. Moreover, we believe that misspelled character generation have significant pedagogical implications and verify such supposition through experiments. We used generated misspelled characters as data augmentation in Chinese character error correction tasks, simulating the scenario where students learn handwritten Chinese characters with the help of misspelled characters. The significantly improved performance of error correction tasks demonstrates the effectiveness of our proposed approach and the value of misspelled character generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08062v1-abstract-full').style.display = 'none'; document.getElementById('2501.08062v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07014">arXiv:2501.07014</a> <span> [<a href="https://arxiv.org/pdf/2501.07014">pdf</a>, <a href="https://arxiv.org/format/2501.07014">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AlgoRxplorers | Precision in Mutation: Enhancing Drug Design with Advanced Protein Stability Prediction Tools </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thakrar%2C+K">Karishma Thakrar</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiangqin Ma</a>, <a href="/search/cs?searchtype=author&query=Diamond%2C+M">Max Diamond</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+A">Akash Patel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07014v3-abstract-short" style="display: inline;"> Predicting the impact of single-point amino acid mutations on protein stability is essential for understanding disease mechanisms and advancing drug development. Protein stability, quantified by changes in Gibbs free energy ($螖螖G$), is influenced by these mutations. However, the scarcity of data and the complexity of model interpretation pose challenges in accurately predicting stability changes.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07014v3-abstract-full').style.display = 'inline'; document.getElementById('2501.07014v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07014v3-abstract-full" style="display: none;"> Predicting the impact of single-point amino acid mutations on protein stability is essential for understanding disease mechanisms and advancing drug development. Protein stability, quantified by changes in Gibbs free energy ($螖螖G$), is influenced by these mutations. However, the scarcity of data and the complexity of model interpretation pose challenges in accurately predicting stability changes. This study proposes the application of deep neural networks, leveraging transfer learning and fusing complementary information from different models, to create a feature-rich representation of the protein stability landscape. We developed four models, with our third model, ThermoMPNN+, demonstrating the best performance in predicting $螖螖G$ values. This approach, which integrates diverse feature sets and embeddings through latent transfusion techniques, aims to refine $螖螖G$ predictions and contribute to a deeper understanding of protein dynamics, potentially leading to advancements in disease research and drug discovery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07014v3-abstract-full').style.display = 'none'; document.getElementById('2501.07014v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06807">arXiv:2501.06807</a> <span> [<a href="https://arxiv.org/pdf/2501.06807">pdf</a>, <a href="https://arxiv.org/format/2501.06807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> MPCache: MPC-Friendly KV Cache Eviction for Efficient Private Large Language Model Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+W">Wenxuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Ye Dong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jinjin Zhou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Junming Ma</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+J">Jin Tan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Runsheng Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Meng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06807v1-abstract-short" style="display: inline;"> Private large language model (LLM) inference based on secure multi-party computation (MPC) offers cryptographically-secure protection for both user prompt and proprietary model weights. However, it suffers from large latency overhead especially for long input sequences. While key-value (KV) cache eviction algorithms have been proposed to reduce the computation and memory cost for plaintext inferen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06807v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06807v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06807v1-abstract-full" style="display: none;"> Private large language model (LLM) inference based on secure multi-party computation (MPC) offers cryptographically-secure protection for both user prompt and proprietary model weights. However, it suffers from large latency overhead especially for long input sequences. While key-value (KV) cache eviction algorithms have been proposed to reduce the computation and memory cost for plaintext inference, they are not designed for MPC and cannot benefit private inference easily. In this paper, we propose an accurate and MPC-friendly KV cache eviction framework, dubbed MPCache. MPCache is built on the observation that historical tokens in a long sequence may have different effects on the downstream decoding. Hence, MPCache combines a look-once static eviction algorithm to discard unimportant tokens and a query-aware dynamic selection algorithm to further select a small subset of tokens for attention computation. As existing dynamic selection algorithms incur too much latency, we propose a series of optimizations to drastically reduce the KV cache selection overhead, including MPC-friendly similarity approximation, hierarchical KV cache clustering, and cross-layer index sharing strategy. With extensive experiments, we demonstrate that MPCache consistently outperforms prior-art KV cache eviction baselines across different LLM generation tasks and achieves 1.8~2.01x and 3.39~8.37x decoding latency and communication reduction on different sequence lengths, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06807v1-abstract-full').style.display = 'none'; document.getElementById('2501.06807v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06132">arXiv:2501.06132</a> <span> [<a href="https://arxiv.org/pdf/2501.06132">pdf</a>, <a href="https://arxiv.org/format/2501.06132">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> CoDriveVLM: VLM-Enhanced Urban Cooperative Dispatching and Motion Planning for Future Autonomous Mobility on Demand Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haichao Liu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+R">Ruoyu Yao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenru Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhenmin Huang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+S">Shaojie Shen</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06132v1-abstract-short" style="display: inline;"> The increasing demand for flexible and efficient urban transportation solutions has spotlighted the limitations of traditional Demand Responsive Transport (DRT) systems, particularly in accommodating diverse passenger needs and dynamic urban environments. Autonomous Mobility-on-Demand (AMoD) systems have emerged as a promising alternative, leveraging connected and autonomous vehicles (CAVs) to pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06132v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06132v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06132v1-abstract-full" style="display: none;"> The increasing demand for flexible and efficient urban transportation solutions has spotlighted the limitations of traditional Demand Responsive Transport (DRT) systems, particularly in accommodating diverse passenger needs and dynamic urban environments. Autonomous Mobility-on-Demand (AMoD) systems have emerged as a promising alternative, leveraging connected and autonomous vehicles (CAVs) to provide responsive and adaptable services. However, existing methods primarily focus on either vehicle scheduling or path planning, which often simplify complex urban layouts and neglect the necessity for simultaneous coordination and mutual avoidance among CAVs. This oversimplification poses significant challenges to the deployment of AMoD systems in real-world scenarios. To address these gaps, we propose CoDriveVLM, a novel framework that integrates high-fidelity simultaneous dispatching and cooperative motion planning for future AMoD systems. Our method harnesses Vision-Language Models (VLMs) to enhance multi-modality information processing, and this enables comprehensive dispatching and collision risk evaluation. The VLM-enhanced CAV dispatching coordinator is introduced to effectively manage complex and unforeseen AMoD conditions, thus supporting efficient scheduling decision-making. Furthermore, we propose a scalable decentralized cooperative motion planning method via consensus alternating direction method of multipliers (ADMM) focusing on collision risk evaluation and decentralized trajectory optimization. Simulation results demonstrate the feasibility and robustness of CoDriveVLM in various traffic conditions, showcasing its potential to significantly improve the fidelity and effectiveness of AMoD systems in future urban transportation networks. The code is available at https://github.com/henryhcliu/CoDriveVLM.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06132v1-abstract-full').style.display = 'none'; document.getElementById('2501.06132v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05057">arXiv:2501.05057</a> <span> [<a href="https://arxiv.org/pdf/2501.05057">pdf</a>, <a href="https://arxiv.org/format/2501.05057">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LearningFlow: Automated Policy Learning Workflow for Urban Driving with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Zengqi Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yubin Wang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+L">Lei Zheng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05057v1-abstract-short" style="display: inline;"> Recent advancements in reinforcement learning (RL) demonstrate the significant potential in autonomous driving. Despite this promise, challenges such as the manual design of reward functions and low sample efficiency in complex environments continue to impede the development of safe and effective driving policies. To tackle these issues, we introduce LearningFlow, an innovative automated policy le… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05057v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05057v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05057v1-abstract-full" style="display: none;"> Recent advancements in reinforcement learning (RL) demonstrate the significant potential in autonomous driving. Despite this promise, challenges such as the manual design of reward functions and low sample efficiency in complex environments continue to impede the development of safe and effective driving policies. To tackle these issues, we introduce LearningFlow, an innovative automated policy learning workflow tailored to urban driving. This framework leverages the collaboration of multiple large language model (LLM) agents throughout the RL training process. LearningFlow includes a curriculum sequence generation process and a reward generation process, which work in tandem to guide the RL policy by generating tailored training curricula and reward functions. Particularly, each process is supported by an analysis agent that evaluates training progress and provides critical insights to the generation agent. Through the collaborative efforts of these LLM agents, LearningFlow automates policy learning across a series of complex driving tasks, and it significantly reduces the reliance on manual reward function design while enhancing sample efficiency. Comprehensive experiments are conducted in the high-fidelity CARLA simulator, along with comparisons with other existing methods, to demonstrate the efficacy of our proposed approach. The results demonstrate that LearningFlow excels in generating rewards and curricula. It also achieves superior performance and robust generalization across various driving tasks, as well as commendable adaptation to different RL algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05057v1-abstract-full').style.display = 'none'; document.getElementById('2501.05057v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04942">arXiv:2501.04942</a> <span> [<a href="https://arxiv.org/pdf/2501.04942">pdf</a>, <a href="https://arxiv.org/format/2501.04942">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Vision Graph Non-Contrastive Learning for Audio Deepfake Detection with Limited Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Febrinanto%2C+F+G">Falih Gozi Febrinanto</a>, <a href="/search/cs?searchtype=author&query=Moore%2C+K">Kristen Moore</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+C">Chandra Thapa</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiangang Ma</a>, <a href="/search/cs?searchtype=author&query=Saikrishna%2C+V">Vidya Saikrishna</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+F">Feng Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04942v1-abstract-short" style="display: inline;"> Recent advancements in audio deepfake detection have leveraged graph neural networks (GNNs) to model frequency and temporal interdependencies in audio data, effectively identifying deepfake artifacts. However, the reliance of GNN-based methods on substantial labeled data for graph construction and robust performance limits their applicability in scenarios with limited labeled data. Although vast a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04942v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04942v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04942v1-abstract-full" style="display: none;"> Recent advancements in audio deepfake detection have leveraged graph neural networks (GNNs) to model frequency and temporal interdependencies in audio data, effectively identifying deepfake artifacts. However, the reliance of GNN-based methods on substantial labeled data for graph construction and robust performance limits their applicability in scenarios with limited labeled data. Although vast amounts of audio data exist, the process of labeling samples as genuine or fake remains labor-intensive and costly. To address this challenge, we propose SIGNL (Spatio-temporal vIsion Graph Non-contrastive Learning), a novel framework that maintains high GNN performance in low-label settings. SIGNL constructs spatio-temporal graphs by representing patches from the audio's visual spectrogram as nodes. These graph structures are modeled using vision graph convolutional (GC) encoders pre-trained through graph non-contrastive learning, a label-free that maximizes the similarity between positive pairs. The pre-trained encoders are then fine-tuned for audio deepfake detection, reducing reliance on labeled data. Experiments demonstrate that SIGNL outperforms state-of-the-art baselines across multiple audio deepfake detection datasets, achieving the lowest Equal Error Rate (EER) with as little as 5% labeled data. Additionally, SIGNL exhibits strong cross-domain generalization, achieving the lowest EER in evaluations involving diverse attack types and languages in the In-The-Wild dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04942v1-abstract-full').style.display = 'none'; document.getElementById('2501.04942v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03670">arXiv:2501.03670</a> <span> [<a href="https://arxiv.org/pdf/2501.03670">pdf</a>, <a href="https://arxiv.org/format/2501.03670">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Diversity-Enhanced Knowledge Distillation Model for Practical Math Word Problem Solving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+G">Guangyou Zhou</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhiwen Xie</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jinjin Ma</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J+X">Jimmy Xiangji Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03670v1-abstract-short" style="display: inline;"> Math Word Problem (MWP) solving is a critical task in natural language processing, has garnered significant research interest in recent years. Various recent studies heavily rely on Seq2Seq models and their extensions (e.g., Seq2Tree and Graph2Tree) to generate mathematical equations. While effective, these models struggle to generate diverse but counterpart solution equations, limiting their gene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03670v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03670v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03670v1-abstract-full" style="display: none;"> Math Word Problem (MWP) solving is a critical task in natural language processing, has garnered significant research interest in recent years. Various recent studies heavily rely on Seq2Seq models and their extensions (e.g., Seq2Tree and Graph2Tree) to generate mathematical equations. While effective, these models struggle to generate diverse but counterpart solution equations, limiting their generalization across various math problem scenarios. In this paper, we introduce a novel Diversity-enhanced Knowledge Distillation (DivKD) model for practical MWP solving. Our approach proposes an adaptive diversity distillation method, in which a student model learns diverse equations by selectively transferring high-quality knowledge from a teacher model. Additionally, we design a diversity prior-enhanced student model to better capture the diversity distribution of equations by incorporating a conditional variational auto-encoder. Extensive experiments on {four} MWP benchmark datasets demonstrate that our approach achieves higher answer accuracy than strong baselines while maintaining high efficiency for practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03670v1-abstract-full').style.display = 'none'; document.getElementById('2501.03670v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03596">arXiv:2501.03596</a> <span> [<a href="https://arxiv.org/pdf/2501.03596">pdf</a>, <a href="https://arxiv.org/format/2501.03596">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Exploring EEG and Eye Movement Fusion for Multi-Class Target RSVP-BCI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xujin Li</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+W">Wei Wei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kun Zhao</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+J">Jiayu Mao</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yizhuo Lu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+S">Shuang Qiu</a>, <a href="/search/cs?searchtype=author&query=He%2C+H">Huiguang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03596v1-abstract-short" style="display: inline;"> Rapid Serial Visual Presentation (RSVP)-based Brain-Computer Interfaces (BCIs) facilitate high-throughput target image detection by identifying event-related potentials (ERPs) evoked in EEG signals. The RSVP-BCI systems effectively detect single-class targets within a stream of images but have limited applicability in scenarios that require detecting multiple target categories. Multi-class RSVP-BC… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03596v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03596v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03596v1-abstract-full" style="display: none;"> Rapid Serial Visual Presentation (RSVP)-based Brain-Computer Interfaces (BCIs) facilitate high-throughput target image detection by identifying event-related potentials (ERPs) evoked in EEG signals. The RSVP-BCI systems effectively detect single-class targets within a stream of images but have limited applicability in scenarios that require detecting multiple target categories. Multi-class RSVP-BCI systems address this limitation by simultaneously identifying the presence of a target and distinguishing its category. However, existing multi-class RSVP decoding algorithms predominantly rely on single-modality EEG decoding, which restricts their performance improvement due to the high similarity between ERPs evoked by different target categories. In this work, we introduce eye movement (EM) modality into multi-class RSVP decoding and explore EEG and EM fusion to enhance decoding performance. First, we design three independent multi-class target RSVP tasks and build an open-source dataset comprising EEG and EM signals from 43 subjects. Then, we propose the Multi-class Target RSVP EEG and EM fusion Network (MTREE-Net) to enhance multi-class RSVP decoding. Specifically, a dual-complementary module is proposed to strengthen the differentiation of uni-modal features across categories. To improve multi-modal fusion performance, we adopt a dynamic reweighting fusion strategy guided by theoretically derived modality contribution ratios. Furthermore, we reduce the misclassification of non-target samples through knowledge transfer between two hierarchical classifiers. Extensive experiments demonstrate the feasibility of integrating EM signals into multi-class RSVP decoding and highlight the superior performance of MTREE-Net compared to existing RSVP decoding methods. The proposed MTREE-Net and open-source dataset provide a promising framework for developing practical multi-class RSVP-BCI systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03596v1-abstract-full').style.display = 'none'; document.getElementById('2501.03596v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.5.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03272">arXiv:2501.03272</a> <span> [<a href="https://arxiv.org/pdf/2501.03272">pdf</a>, <a href="https://arxiv.org/format/2501.03272">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Backdoor Token Unlearning: Exposing and Defending Backdoors in Pretrained Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+P">Peihai Jiang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+X">Xixiang Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yige Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jing Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03272v1-abstract-short" style="display: inline;"> Supervised fine-tuning has become the predominant method for adapting large pretrained models to downstream tasks. However, recent studies have revealed that these models are vulnerable to backdoor attacks, where even a small number of malicious samples can successfully embed backdoor triggers into the model. While most existing defense methods focus on post-training backdoor defense, efficiently… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03272v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03272v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03272v1-abstract-full" style="display: none;"> Supervised fine-tuning has become the predominant method for adapting large pretrained models to downstream tasks. However, recent studies have revealed that these models are vulnerable to backdoor attacks, where even a small number of malicious samples can successfully embed backdoor triggers into the model. While most existing defense methods focus on post-training backdoor defense, efficiently defending against backdoor attacks during training phase remains largely unexplored. To address this gap, we propose a novel defense method called Backdoor Token Unlearning (BTU), which proactively detects and neutralizes trigger tokens during the training stage. Our work is based on two key findings: 1) backdoor learning causes distinctive differences between backdoor token parameters and clean token parameters in word embedding layers, and 2) the success of backdoor attacks heavily depends on backdoor token parameters. The BTU defense leverages these properties to identify aberrant embedding parameters and subsequently removes backdoor behaviors using a fine-grained unlearning technique. Extensive evaluations across three datasets and four types of backdoor attacks demonstrate that BTU effectively defends against these threats while preserving the model's performance on primary tasks. Our code is available at https://github.com/XDJPH/BTU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03272v1-abstract-full').style.display = 'none'; document.getElementById('2501.03272v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02979">arXiv:2501.02979</a> <span> [<a href="https://arxiv.org/pdf/2501.02979">pdf</a>, <a href="https://arxiv.org/format/2501.02979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Registering Source Tokens to Target Language Spaces in Multilingual Neural Machine Translation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+Z">Zhi Qu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yiran Wang</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+J">Jiannan Mao</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+C">Chenchen Ding</a>, <a href="/search/cs?searchtype=author&query=Tanaka%2C+H">Hideki Tanaka</a>, <a href="/search/cs?searchtype=author&query=Utiyama%2C+M">Masao Utiyama</a>, <a href="/search/cs?searchtype=author&query=Watanabe%2C+T">Taro Watanabe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02979v1-abstract-short" style="display: inline;"> The multilingual neural machine translation (MNMT) enables arbitrary translations across multiple languages by training a model with limited parameters using parallel data only. However, the performance of such MNMT models still lags behind that of large language models (LLMs), limiting their practicality. In this work, we address this limitation by introducing registering to achieve the new state… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02979v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02979v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02979v1-abstract-full" style="display: none;"> The multilingual neural machine translation (MNMT) enables arbitrary translations across multiple languages by training a model with limited parameters using parallel data only. However, the performance of such MNMT models still lags behind that of large language models (LLMs), limiting their practicality. In this work, we address this limitation by introducing registering to achieve the new state-of-the-art of decoder-only MNMT models. Specifically, we insert a set of artificial tokens specifying the target language, called registers, into the input sequence between the source and target tokens. By modifying the attention mask, the target token generation only pays attention to the activation of registers, representing the source tokens in the target language space. Experiments on EC-40, a large-scale benchmark, show that our method outperforms related methods driven by optimizing multilingual representations. We further scale up and collect 9.3 billion sentence pairs across 24 languages from public datasets to pre-train two models, namely MITRE (multilingual translation with registers). One of them, MITRE-913M, outperforms NLLB-3.3B, achieves comparable performance with commercial LLMs, and shows strong adaptability in fine-tuning. Finally, we open-source our models to facilitate further research and development in MNMT: https://github.com/zhiqu22/mitre. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02979v1-abstract-full').style.display = 'none'; document.getElementById('2501.02979v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02870">arXiv:2501.02870</a> <span> [<a href="https://arxiv.org/pdf/2501.02870">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICN64251.2024.10865971">10.1109/ICN64251.2024.10865971 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Spectrum Sharing in 6G Space-Ground Integrated Networks: A Ground Protection Zone-Based Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shang%2C+B">Bodong Shang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiangyu Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zheng Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Junchao Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02870v1-abstract-short" style="display: inline;"> Space-ground integrated network (SGIN) has been envisioned as a competitive solution for large scale and wide coverage of future wireless networks. By integrating both the non-terrestrial network (NTN) and the terrestrial network (TN), SGIN can provide high speed and omnipresent wireless network access for the users using the predefined licensed spectrums. Considering the scarcity of the spectrum… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02870v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02870v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02870v1-abstract-full" style="display: none;"> Space-ground integrated network (SGIN) has been envisioned as a competitive solution for large scale and wide coverage of future wireless networks. By integrating both the non-terrestrial network (NTN) and the terrestrial network (TN), SGIN can provide high speed and omnipresent wireless network access for the users using the predefined licensed spectrums. Considering the scarcity of the spectrum resource and the low spectrum efficiency of the SGIN, we enable the NTN and TN to share the spectrum to improve overall system performance, i.e., weighted-sum area data rate (WS-ADR). However, mutual interference between NTN and TN is often inevitable and thus causes SGIN performance degradation. In this work, we consider a ground protection zone for the TN base stations, in which the NTN users are only allowed to use the NTN reserved spectrum to mitigate the NTN and TN mutual interference. We analytically derive the coverage probability and area data rate (ADR) of the typical users and study the performance under various protection zone sizes and spectrum allocation parameter settings. Simulation and numerical results demonstrate that the WS-ADR could be maximized by selecting the appropriate radius of protection zone and bandwidth allocation factor in the SGIN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02870v1-abstract-full').style.display = 'none'; document.getElementById('2501.02870v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02815">arXiv:2501.02815</a> <span> [<a href="https://arxiv.org/pdf/2501.02815">pdf</a>, <a href="https://arxiv.org/format/2501.02815">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Local Reactive Control for Mobile Manipulators with Whole-Body Safety in Complex Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+C">Chunxin Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yulin Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhiyuan Song</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Zhihai Bi</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jinni Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+B">Boyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02815v1-abstract-short" style="display: inline;"> Mobile manipulators typically encounter significant challenges in navigating narrow, cluttered environments due to their high-dimensional state spaces and complex kinematics. While reactive methods excel in dynamic settings, they struggle to efficiently incorporate complex, coupled constraints across the entire state space. In this work, we present a novel local reactive controller that reformulat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02815v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02815v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02815v1-abstract-full" style="display: none;"> Mobile manipulators typically encounter significant challenges in navigating narrow, cluttered environments due to their high-dimensional state spaces and complex kinematics. While reactive methods excel in dynamic settings, they struggle to efficiently incorporate complex, coupled constraints across the entire state space. In this work, we present a novel local reactive controller that reformulates the time-domain single-step problem into a multi-step optimization problem in the spatial domain, leveraging the propagation of a serial kinematic chain. This transformation facilitates the formulation of customized, decoupled link-specific constraints, which is further solved efficiently with augmented Lagrangian differential dynamic programming (AL-DDP). Our approach naturally absorbs spatial kinematic propagation in the forward pass and processes all link-specific constraints simultaneously during the backward pass, enhancing both constraint management and computational efficiency. Notably, in this framework, we formulate collision avoidance constraints for each link using accurate geometric models with extracted free regions, and this improves the maneuverability of the mobile manipulator in narrow, cluttered spaces. Experimental results showcase significant improvements in safety, efficiency, and task completion rates. These findings underscore the robustness of the proposed method, particularly in narrow, cluttered environments where conventional approaches could falter. The open-source project can be found at https://github.com/Chunx1nZHENG/MM-with-Whole-Body-Safety-Release.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02815v1-abstract-full').style.display = 'none'; document.getElementById('2501.02815v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02530">arXiv:2501.02530</a> <span> [<a href="https://arxiv.org/pdf/2501.02530">pdf</a>, <a href="https://arxiv.org/format/2501.02530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> UDMC: Unified Decision-Making and Control Framework for Urban Autonomous Driving with Motion Prediction of Traffic Participants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haichao Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yulin Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhenmin Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02530v1-abstract-short" style="display: inline;"> Current autonomous driving systems often struggle to balance decision-making and motion control while ensuring safety and traffic rule compliance, especially in complex urban environments. Existing methods may fall short due to separate handling of these functionalities, leading to inefficiencies and safety compromises. To address these challenges, we introduce UDMC, an interpretable and unified L… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02530v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02530v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02530v1-abstract-full" style="display: none;"> Current autonomous driving systems often struggle to balance decision-making and motion control while ensuring safety and traffic rule compliance, especially in complex urban environments. Existing methods may fall short due to separate handling of these functionalities, leading to inefficiencies and safety compromises. To address these challenges, we introduce UDMC, an interpretable and unified Level 4 autonomous driving framework. UDMC integrates decision-making and motion control into a single optimal control problem (OCP), considering the dynamic interactions with surrounding vehicles, pedestrians, road lanes, and traffic signals. By employing innovative potential functions to model traffic participants and regulations, and incorporating a specialized motion prediction module, our framework enhances on-road safety and rule adherence. The integrated design allows for real-time execution of flexible maneuvers suited to diverse driving scenarios. High-fidelity simulations conducted in CARLA exemplify the framework's computational efficiency, robustness, and safety, resulting in superior driving performance when compared against various baseline models. Our open-source project is available at https://github.com/henryhcliu/udmc_carla.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02530v1-abstract-full').style.display = 'none'; document.getElementById('2501.02530v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01892">arXiv:2501.01892</a> <span> [<a href="https://arxiv.org/pdf/2501.01892">pdf</a>, <a href="https://arxiv.org/format/2501.01892">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> QuArch: A Question-Answering Dataset for AI Agents in Computer Architecture </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Prakash%2C+S">Shvetank Prakash</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+A">Andrew Cheng</a>, <a href="/search/cs?searchtype=author&query=Yik%2C+J">Jason Yik</a>, <a href="/search/cs?searchtype=author&query=Tschand%2C+A">Arya Tschand</a>, <a href="/search/cs?searchtype=author&query=Ghosal%2C+R">Radhika Ghosal</a>, <a href="/search/cs?searchtype=author&query=Uchendu%2C+I">Ikechukwu Uchendu</a>, <a href="/search/cs?searchtype=author&query=Quaye%2C+J">Jessica Quaye</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jeffrey Ma</a>, <a href="/search/cs?searchtype=author&query=Grampurohit%2C+S">Shreyas Grampurohit</a>, <a href="/search/cs?searchtype=author&query=Giannuzzi%2C+S">Sofia Giannuzzi</a>, <a href="/search/cs?searchtype=author&query=Balyan%2C+A">Arnav Balyan</a>, <a href="/search/cs?searchtype=author&query=Amin%2C+F">Fin Amin</a>, <a href="/search/cs?searchtype=author&query=Pipersenia%2C+A">Aadya Pipersenia</a>, <a href="/search/cs?searchtype=author&query=Choudhary%2C+Y">Yash Choudhary</a>, <a href="/search/cs?searchtype=author&query=Nayak%2C+A">Ankita Nayak</a>, <a href="/search/cs?searchtype=author&query=Yazdanbakhsh%2C+A">Amir Yazdanbakhsh</a>, <a href="/search/cs?searchtype=author&query=Reddi%2C+V+J">Vijay Janapa Reddi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01892v2-abstract-short" style="display: inline;"> We introduce QuArch, a dataset of 1500 human-validated question-answer pairs designed to evaluate and enhance language models' understanding of computer architecture. The dataset covers areas including processor design, memory systems, and performance optimization. Our analysis highlights a significant performance gap: the best closed-source model achieves 84% accuracy, while the top small open-so… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01892v2-abstract-full').style.display = 'inline'; document.getElementById('2501.01892v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01892v2-abstract-full" style="display: none;"> We introduce QuArch, a dataset of 1500 human-validated question-answer pairs designed to evaluate and enhance language models' understanding of computer architecture. The dataset covers areas including processor design, memory systems, and performance optimization. Our analysis highlights a significant performance gap: the best closed-source model achieves 84% accuracy, while the top small open-source model reaches 72%. We observe notable struggles in memory systems, interconnection networks, and benchmarking. Fine-tuning with QuArch improves small model accuracy by up to 8%, establishing a foundation for advancing AI-driven computer architecture research. The dataset and leaderboard are at https://harvard-edge.github.io/QuArch/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01892v2-abstract-full').style.display = 'none'; document.getElementById('2501.01892v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01456">arXiv:2501.01456</a> <span> [<a href="https://arxiv.org/pdf/2501.01456">pdf</a>, <a href="https://arxiv.org/format/2501.01456">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SS-CTML: Self-Supervised Cross-Task Mutual Learning for CT Image Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+G">Gaofeng Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yaoduo Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Li Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengfei Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+D">Dong Zeng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianhua Ma</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Ji He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01456v1-abstract-short" style="display: inline;"> Supervised deep-learning (SDL) techniques with paired training datasets have been widely studied for X-ray computed tomography (CT) image reconstruction. However, due to the difficulties of obtaining paired training datasets in clinical routine, the SDL methods are still away from common uses in clinical practices. In recent years, self-supervised deep-learning (SSDL) techniques have shown great p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01456v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01456v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01456v1-abstract-full" style="display: none;"> Supervised deep-learning (SDL) techniques with paired training datasets have been widely studied for X-ray computed tomography (CT) image reconstruction. However, due to the difficulties of obtaining paired training datasets in clinical routine, the SDL methods are still away from common uses in clinical practices. In recent years, self-supervised deep-learning (SSDL) techniques have shown great potential for the studies of CT image reconstruction. In this work, we propose a self-supervised cross-task mutual learning (SS-CTML) framework for CT image reconstruction. Specifically, a sparse-view scanned and a limited-view scanned sinogram data are first extracted from a full-view scanned sinogram data, which results in three individual reconstruction tasks, i.e., the full-view CT (FVCT) reconstruction, the sparse-view CT (SVCT) reconstruction, and limited-view CT (LVCT) reconstruction. Then, three neural networks are constructed for the three reconstruction tasks. Considering that the ultimate goals of the three tasks are all to reconstruct high-quality CT images, we therefore construct a set of cross-task mutual learning objectives for the three tasks, in which way, the three neural networks can be self-supervised optimized by learning from each other. Clinical datasets are adopted to evaluate the effectiveness of the proposed framework. Experimental results demonstrate that the SS-CTML framework can obtain promising CT image reconstruction performance in terms of both quantitative and qualitative measurements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01456v1-abstract-full').style.display = 'none'; document.getElementById('2501.01456v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00601">arXiv:2501.00601</a> <span> [<a href="https://arxiv.org/pdf/2501.00601">pdf</a>, <a href="https://arxiv.org/format/2501.00601">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> DreamDrive: Generative 4D Scene Modeling from Street View Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mao%2C+J">Jiageng Mao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Boyi Li</a>, <a href="/search/cs?searchtype=author&query=Ivanovic%2C+B">Boris Ivanovic</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxiao Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=You%2C+Y">Yurong You</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+C">Chaowei Xiao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+D">Danfei Xu</a>, <a href="/search/cs?searchtype=author&query=Pavone%2C+M">Marco Pavone</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00601v2-abstract-short" style="display: inline;"> Synthesizing photo-realistic visual observations from an ego vehicle's driving trajectory is a critical step towards scalable training of self-driving models. Reconstruction-based methods create 3D scenes from driving logs and synthesize geometry-consistent driving videos through neural rendering, but their dependence on costly object annotations limits their ability to generalize to in-the-wild d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00601v2-abstract-full').style.display = 'inline'; document.getElementById('2501.00601v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00601v2-abstract-full" style="display: none;"> Synthesizing photo-realistic visual observations from an ego vehicle's driving trajectory is a critical step towards scalable training of self-driving models. Reconstruction-based methods create 3D scenes from driving logs and synthesize geometry-consistent driving videos through neural rendering, but their dependence on costly object annotations limits their ability to generalize to in-the-wild driving scenarios. On the other hand, generative models can synthesize action-conditioned driving videos in a more generalizable way but often struggle with maintaining 3D visual consistency. In this paper, we present DreamDrive, a 4D spatial-temporal scene generation approach that combines the merits of generation and reconstruction, to synthesize generalizable 4D driving scenes and dynamic driving videos with 3D consistency. Specifically, we leverage the generative power of video diffusion models to synthesize a sequence of visual references and further elevate them to 4D with a novel hybrid Gaussian representation. Given a driving trajectory, we then render 3D-consistent driving videos via Gaussian splatting. The use of generative priors allows our method to produce high-quality 4D scenes from in-the-wild driving data, while neural rendering ensures 3D-consistent video generation from the 4D scenes. Extensive experiments on nuScenes and street view images demonstrate that DreamDrive can generate controllable and generalizable 4D driving scenes, synthesize novel views of driving videos with high fidelity and 3D consistency, decompose static and dynamic elements in a self-supervised manner, and enhance perception and planning tasks for autonomous driving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00601v2-abstract-full').style.display = 'none'; document.getElementById('2501.00601v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://pointscoder.github.io/DreamDrive/</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ma%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Ma%2C+J&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository