Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 368 results for author: <span class="mathjax">Ye, X</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Ye%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ye, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ye%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ye, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ye%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13658">arXiv:2502.13658</a> <span> [<a href="https://arxiv.org/pdf/2502.13658">pdf</a>, <a href="https://arxiv.org/format/2502.13658">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> What Skills Do Cyber Security Professionals Need? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ullah%2C+F">Faheem Ullah</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaohan Ye</a>, <a href="/search/cs?searchtype=author&query=Fatima%2C+U">Uswa Fatima</a>, <a href="/search/cs?searchtype=author&query=Akhtar%2C+Z">Zahid Akhtar</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuxi Wu</a>, <a href="/search/cs?searchtype=author&query=Ahmad%2C+H">Hussain Ahmad</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13658v2-abstract-short" style="display: inline;"> Purpose: The increasing number of cyber-attacks has elevated the importance of cybersecurity for organizations. This has also increased the demand for professionals with the necessary skills to protect these organizations. As a result, many individuals are looking to enter the field of cybersecurity. However, there is a lack of clear understanding of the skills required for a successful career in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13658v2-abstract-full').style.display = 'inline'; document.getElementById('2502.13658v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13658v2-abstract-full" style="display: none;"> Purpose: The increasing number of cyber-attacks has elevated the importance of cybersecurity for organizations. This has also increased the demand for professionals with the necessary skills to protect these organizations. As a result, many individuals are looking to enter the field of cybersecurity. However, there is a lack of clear understanding of the skills required for a successful career in this field. In this paper, we identify the skills required for cybersecurity professionals. We also determine how the demand for cyber skills relates to various cyber roles such as security analyst and security architect. Furthermore, we identify the programming languages that are important for cybersecurity professionals. Design/Methodology: For this study, we have collected and analyzed data from 12,161 job ads and 49,002 Stack Overflow posts. By examining this, we identified patterns and trends related to skill requirements, role-specific demands, and programming languages in cybersecurity. Findings: Our results reveal that (i) communication skills and project management skills are the most important soft skills, (ii) as compared to soft skills, the demand for technical skills varies more across various cyber roles, and (iii) Java is the most commonly used programming language. Originality: Our findings serve as a guideline for individuals aiming to get into the field of cybersecurity. Moreover, our findings are useful in terms of informing educational institutes to teach the correct set of skills to students doing degrees in cybersecurity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13658v2-abstract-full').style.display = 'none'; document.getElementById('2502.13658v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12962">arXiv:2502.12962</a> <span> [<a href="https://arxiv.org/pdf/2502.12962">pdf</a>, <a href="https://arxiv.org/format/2502.12962">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Infinite Retrieval: Attention Enhanced LLMs in Long-Context Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaoju Ye</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhichun Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingyuan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12962v1-abstract-short" style="display: inline;"> Limited by the context window size of Large Language Models(LLMs), handling various tasks with input tokens exceeding the upper limit has been challenging, whether it is a simple direct retrieval task or a complex multi-hop reasoning task. Although various methods have been proposed to enhance the long-context processing capabilities of LLMs, they either incur substantial post-training costs, or r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12962v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12962v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12962v1-abstract-full" style="display: none;"> Limited by the context window size of Large Language Models(LLMs), handling various tasks with input tokens exceeding the upper limit has been challenging, whether it is a simple direct retrieval task or a complex multi-hop reasoning task. Although various methods have been proposed to enhance the long-context processing capabilities of LLMs, they either incur substantial post-training costs, or require additional tool modules(e.g.,RAG), or have not shown significant improvement in realistic tasks. Our work observes the correlation between the attention distribution and generated answers across each layer, and establishes the attention allocation aligns with retrieval-augmented capabilities through experiments. Drawing on the above insights, we propose a novel method InfiniRetri that leverages the LLMs's own attention information to enable accurate retrieval across inputs of infinitely length. Our evaluations indicate that InfiniRetri achieves 100% accuracy in the Needle-In-a-Haystack(NIH) test over 1M tokens using a 0.5B parameter model, surpassing other method or larger models and setting a new state-of-the-art(SOTA). Moreover, our method achieves significant performance improvements on real-world benchmarks, with a maximum 288% improvement. In addition, InfiniRetri can be applied to any Transformer-based LLMs without additional training and substantially reduces inference latency and compute overhead in long texts. In summary, our comprehensive studies show InfiniRetri's potential for practical applications and creates a paradigm for retrievaling information using LLMs own capabilities under infinite-length tokens. Code will be released in link. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12962v1-abstract-full').style.display = 'none'; document.getElementById('2502.12962v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11883">arXiv:2502.11883</a> <span> [<a href="https://arxiv.org/pdf/2502.11883">pdf</a>, <a href="https://arxiv.org/format/2502.11883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> FairDiverse: A Comprehensive Toolkit for Fair and Diverse Information Retrieval Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chen Xu</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zhirui Deng</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+C">Clara Rus</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaopeng Ye</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuanna Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jun Xu</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Z">Zhicheng Dou</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+J">Ji-Rong Wen</a>, <a href="/search/cs?searchtype=author&query=de+Rijke%2C+M">Maarten de Rijke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11883v1-abstract-short" style="display: inline;"> In modern information retrieval (IR). achieving more than just accuracy is essential to sustaining a healthy ecosystem, especially when addressing fairness and diversity considerations. To meet these needs, various datasets, algorithms, and evaluation frameworks have been introduced. However, these algorithms are often tested across diverse metrics, datasets, and experimental setups, leading to in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11883v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11883v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11883v1-abstract-full" style="display: none;"> In modern information retrieval (IR). achieving more than just accuracy is essential to sustaining a healthy ecosystem, especially when addressing fairness and diversity considerations. To meet these needs, various datasets, algorithms, and evaluation frameworks have been introduced. However, these algorithms are often tested across diverse metrics, datasets, and experimental setups, leading to inconsistencies and difficulties in direct comparisons. This highlights the need for a comprehensive IR toolkit that enables standardized evaluation of fairness- and diversity-aware algorithms across different IR tasks. To address this challenge, we present FairDiverse, an open-source and standardized toolkit. FairDiverse offers a framework for integrating fair and diverse methods, including pre-processing, in-processing, and post-processing techniques, at different stages of the IR pipeline. The toolkit supports the evaluation of 28 fairness and diversity algorithms across 16 base models, covering two core IR tasks (search and recommendation) thereby establishing a comprehensive benchmark. Moreover, FairDiverse is highly extensible, providing multiple APIs that empower IR researchers to swiftly develop and evaluate their own fairness and diversity aware models, while ensuring fair comparisons with existing baselines. The project is open-sourced and available on https://github.com/XuChen0427/FairDiverse. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11883v1-abstract-full').style.display = 'none'; document.getElementById('2502.11883v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09657">arXiv:2502.09657</a> <span> [<a href="https://arxiv.org/pdf/2502.09657">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Integrating Spatiotemporal Vision Transformer into Digital Twins for High-Resolution Heat Stress Forecasting in Campus Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gong%2C+W">Wenjing Gong</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xinyue Ye</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+K">Keshu Wu</a>, <a href="/search/cs?searchtype=author&query=Jamonnak%2C+S">Suphanut Jamonnak</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yifan Yang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09657v1-abstract-short" style="display: inline;"> Extreme heat events exacerbated by climate change pose significant challenges to urban resilience and planning. This study introduces a climate-responsive digital twin framework integrating the Spatiotemporal Vision Transformer (ST-ViT) model to enhance heat stress forecasting and decision-making. Using a Texas campus as a testbed, we synthesized high-resolution physical model simulations with spa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09657v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09657v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09657v1-abstract-full" style="display: none;"> Extreme heat events exacerbated by climate change pose significant challenges to urban resilience and planning. This study introduces a climate-responsive digital twin framework integrating the Spatiotemporal Vision Transformer (ST-ViT) model to enhance heat stress forecasting and decision-making. Using a Texas campus as a testbed, we synthesized high-resolution physical model simulations with spatial and meteorological data to develop fine-scale human thermal predictions. The ST-ViT-powered digital twin enables efficient, data-driven insights for planners, policymakers, and campus stakeholders, supporting targeted heat mitigation strategies and advancing climate-adaptive urban design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09657v1-abstract-full').style.display = 'none'; document.getElementById('2502.09657v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08886">arXiv:2502.08886</a> <span> [<a href="https://arxiv.org/pdf/2502.08886">pdf</a>, <a href="https://arxiv.org/format/2502.08886">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generative AI for Internet of Things Security: Challenges and Opportunities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Aung%2C+Y+L">Yan Lin Aung</a>, <a href="/search/cs?searchtype=author&query=Christian%2C+I">Ivan Christian</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Ye Dong</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaodong Ye</a>, <a href="/search/cs?searchtype=author&query=Chattopadhyay%2C+S">Sudipta Chattopadhyay</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jianying Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08886v1-abstract-short" style="display: inline;"> As Generative AI (GenAI) continues to gain prominence and utility across various sectors, their integration into the realm of Internet of Things (IoT) security evolves rapidly. This work delves into an examination of the state-of-the-art literature and practical applications on how GenAI could improve and be applied in the security landscape of IoT. Our investigation aims to map the current state… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08886v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08886v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08886v1-abstract-full" style="display: none;"> As Generative AI (GenAI) continues to gain prominence and utility across various sectors, their integration into the realm of Internet of Things (IoT) security evolves rapidly. This work delves into an examination of the state-of-the-art literature and practical applications on how GenAI could improve and be applied in the security landscape of IoT. Our investigation aims to map the current state of GenAI implementation within IoT security, exploring their potential to fortify security measures further. Through the compilation, synthesis, and analysis of the latest advancements in GenAI technologies applied to IoT, this paper not only introduces fresh insights into the field, but also lays the groundwork for future research directions. It explains the prevailing challenges within IoT security, discusses the effectiveness of GenAI in addressing these issues, and identifies significant research gaps through MITRE Mitigations. Accompanied with three case studies, we provide a comprehensive overview of the progress and future prospects of GenAI applications in IoT security. This study serves as a foundational resource to improve IoT security through the innovative application of GenAI, thus contributing to the broader discourse on IoT security and technology integration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08886v1-abstract-full').style.display = 'none'; document.getElementById('2502.08886v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07307">arXiv:2502.07307</a> <span> [<a href="https://arxiv.org/pdf/2502.07307">pdf</a>, <a href="https://arxiv.org/format/2502.07307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> CreAgent: Towards Long-Term Evaluation of Recommender System under Platform-Creator Information Asymmetry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaopeng Ye</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chen Xu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zhongxiang Sun</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jun Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Gang Wang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zhenhua Dong</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+J">Ji-Rong Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07307v1-abstract-short" style="display: inline;"> Ensuring the long-term sustainability of recommender systems (RS) emerges as a crucial issue. Traditional offline evaluation methods for RS typically focus on immediate user feedback, such as clicks, but they often neglect the long-term impact of content creators. On real-world content platforms, creators can strategically produce and upload new items based on user feedback and preference trends.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07307v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07307v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07307v1-abstract-full" style="display: none;"> Ensuring the long-term sustainability of recommender systems (RS) emerges as a crucial issue. Traditional offline evaluation methods for RS typically focus on immediate user feedback, such as clicks, but they often neglect the long-term impact of content creators. On real-world content platforms, creators can strategically produce and upload new items based on user feedback and preference trends. While previous studies have attempted to model creator behavior, they often overlook the role of information asymmetry. This asymmetry arises because creators primarily have access to feedback on the items they produce, while platforms possess data on the entire spectrum of user feedback. Current RS simulators, however, fail to account for this asymmetry, leading to inaccurate long-term evaluations. To address this gap, we propose CreAgent, a Large Language Model (LLM)-empowered creator simulation agent. By incorporating game theory's belief mechanism and the fast-and-slow thinking framework, CreAgent effectively simulates creator behavior under conditions of information asymmetry. Additionally, we enhance CreAgent's simulation ability by fine-tuning it using Proximal Policy Optimization (PPO). Our credibility validation experiments show that CreAgent aligns well with the behaviors between real-world platform and creator, thus improving the reliability of long-term RS evaluations. Moreover, through the simulation of RS involving CreAgents, we can explore how fairness- and diversity-aware RS algorithms contribute to better long-term performance for various stakeholders. CreAgent and the simulation platform are publicly available at https://github.com/shawnye2000/CreAgent. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07307v1-abstract-full').style.display = 'none'; document.getElementById('2502.07307v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.3.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05798">arXiv:2502.05798</a> <span> [<a href="https://arxiv.org/pdf/2502.05798">pdf</a>, <a href="https://arxiv.org/format/2502.05798">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> StreamDCIM: A Tile-based Streaming Digital CIM Accelerator with Mixed-stationary Cross-forwarding Dataflow for Multimodal Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qin%2C+S">Shantian Qin</a>, <a href="/search/cs?searchtype=author&query=Qiang%2C+Z">Ziqing Qiang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Z">Zhihua Fan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenming Li</a>, <a href="/search/cs?searchtype=author&query=An%2C+X">Xuejun An</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaochun Ye</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+D">Dongrui Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05798v1-abstract-short" style="display: inline;"> Multimodal Transformers are emerging artificial intelligence (AI) models designed to process a mixture of signals from diverse modalities. Digital computing-in-memory (CIM) architectures are considered promising for achieving high efficiency while maintaining high accuracy. However, current digital CIM-based accelerators exhibit inflexibility in microarchitecture, dataflow, and pipeline to effecti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05798v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05798v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05798v1-abstract-full" style="display: none;"> Multimodal Transformers are emerging artificial intelligence (AI) models designed to process a mixture of signals from diverse modalities. Digital computing-in-memory (CIM) architectures are considered promising for achieving high efficiency while maintaining high accuracy. However, current digital CIM-based accelerators exhibit inflexibility in microarchitecture, dataflow, and pipeline to effectively accelerate multimodal Transformer. In this paper, we propose StreamDCIM, a tile-based streaming digital CIM accelerator for multimodal Transformers. It overcomes the above challenges with three features: First, we present a tile-based reconfigurable CIM macro microarchitecture with normal and hybrid reconfigurable modes to improve intra-macro CIM utilization. Second, we implement a mixed-stationary cross-forwarding dataflow with tile-based execution decoupling to exploit tile-level computation parallelism. Third, we introduce a ping-pong-like fine-grained compute-rewriting pipeline to overlap high-latency on-chip CIM rewriting. Experimental results show that StreamDCIM outperforms non-streaming and layer-based streaming CIM-based solutions by geomean 2.63$\times$ and 1.28$\times$ on typical multimodal Transformer models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05798v1-abstract-full').style.display = 'none'; document.getElementById('2502.05798v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by 2025 IEEE International Symposium on Circuits and Systems (ISCAS)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03493">arXiv:2502.03493</a> <span> [<a href="https://arxiv.org/pdf/2502.03493">pdf</a>, <a href="https://arxiv.org/format/2502.03493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MetaFE-DE: Learning Meta Feature Embedding for Depth Estimation from Monocular Endoscopic Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+D">Dawei Lu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+D">Deqiang Xiao</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+D">Danni Ai</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jingfan Fan</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+T">Tianyu Fu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yucong Lin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Hong Song</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xujiong Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03493v1-abstract-short" style="display: inline;"> Depth estimation from monocular endoscopic images presents significant challenges due to the complexity of endoscopic surgery, such as irregular shapes of human soft tissues, as well as variations in lighting conditions. Existing methods primarily estimate the depth information from RGB images directly, and often surffer the limited interpretability and accuracy. Given that RGB and depth images ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03493v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03493v1-abstract-full" style="display: none;"> Depth estimation from monocular endoscopic images presents significant challenges due to the complexity of endoscopic surgery, such as irregular shapes of human soft tissues, as well as variations in lighting conditions. Existing methods primarily estimate the depth information from RGB images directly, and often surffer the limited interpretability and accuracy. Given that RGB and depth images are two views of the same endoscopic surgery scene, in this paper, we introduce a novel concept referred as ``meta feature embedding (MetaFE)", in which the physical entities (e.g., tissues and surgical instruments) of endoscopic surgery are represented using the shared features that can be alternatively decoded into RGB or depth image. With this concept, we propose a two-stage self-supervised learning paradigm for the monocular endoscopic depth estimation. In the first stage, we propose a temporal representation learner using diffusion models, which are aligned with the spatial information through the cross normalization to construct the MetaFE. In the second stage, self-supervised monocular depth estimation with the brightness calibration is applied to decode the meta features into the depth image. Extensive evaluation on diverse endoscopic datasets demonstrates that our approach outperforms the state-of-the-art method in depth estimation, achieving superior accuracy and generalization. The source code will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03493v1-abstract-full').style.display = 'none'; document.getElementById('2502.03493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00439">arXiv:2502.00439</a> <span> [<a href="https://arxiv.org/pdf/2502.00439">pdf</a>, <a href="https://arxiv.org/format/2502.00439">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> UniAttn: Reducing Inference Costs via Softmax Unification for Post-Training LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiong%2C+Y">Yizhe Xiong</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wei Huang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xin Ye</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hui Chen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zijia Lin</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+H">Haoran Lian</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhenpeng Su</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jungong Han</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+G">Guiguang Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00439v1-abstract-short" style="display: inline;"> Post-training is essential for adapting Large Language Models (LLMs) to real-world applications. Deploying post-trained models faces significant challenges due to substantial memory overhead and noticeable inference latency. Existing work has identified significant redundancies in LLMs and proposed efficient architectures, namely intra-layer KV sharing and cross-layer KV sharing. However, intra-la… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00439v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00439v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00439v1-abstract-full" style="display: none;"> Post-training is essential for adapting Large Language Models (LLMs) to real-world applications. Deploying post-trained models faces significant challenges due to substantial memory overhead and noticeable inference latency. Existing work has identified significant redundancies in LLMs and proposed efficient architectures, namely intra-layer KV sharing and cross-layer KV sharing. However, intra-layer KV sharing still results in high inference costs, while cross-layer KV sharing leads to significant performance degradation. As a result, both methods remain suboptimal for post-training pre-trained LLMs. In this paper, we identify that the \texttt{Softmax} operation is a primary bottleneck for LLM inference and discover that it is actually highly redundant during post-training. We propose Softmax \textbf{Uni}fication in \textbf{Att}e\textbf{n}tion (\textbf{UniAttn}), a novel post-training method that unifies Softmax activations across transformer blocks to reduce LLM inference costs. Additionally, UniAttn adopts a linear projection to compensate for the errors induced by Softmax unification. Experiments show that UniAttn matches the performance of standard post-training while significantly reducing inference costs, outperforming existing efficient architectures during post-training. Our code will be available at \url{https://github.com/Bostoncake/UniAttn}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00439v1-abstract-full').style.display = 'none'; document.getElementById('2502.00439v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 4 figures. Preprint, under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00245">arXiv:2502.00245</a> <span> [<a href="https://arxiv.org/pdf/2502.00245">pdf</a>, <a href="https://arxiv.org/format/2502.00245">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Contrastive Private Data Synthesis via Weighted Multi-PLM Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+T">Tianyuan Zou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Peng Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Y">Yufei Xiong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingjing Liu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaozhou Ye</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+Y">Ye Ouyang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ya-Qin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00245v1-abstract-short" style="display: inline;"> Substantial quantity and high quality are the golden rules of making a good training dataset with sample privacy protection equally important. Generating synthetic samples that resemble high-quality private data while ensuring Differential Privacy (DP), a formal privacy guarantee, promises scalability and practicality. However, existing methods relying on pre-trained models for data synthesis %tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00245v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00245v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00245v1-abstract-full" style="display: none;"> Substantial quantity and high quality are the golden rules of making a good training dataset with sample privacy protection equally important. Generating synthetic samples that resemble high-quality private data while ensuring Differential Privacy (DP), a formal privacy guarantee, promises scalability and practicality. However, existing methods relying on pre-trained models for data synthesis %that avoid fine-tuning large pre-trained generative models often struggle in data-deficient scenarios, suffering from limited sample size, inevitable generation noise and existing pre-trained model bias. To address these challenges, we propose a novel contrAstive private data Synthesis via Weighted multiple Pre-trained language models (PLM) framework, named as WASP. WASP utilizes limited private samples for more accurate private data distribution estimation via a Top-Q voting mechanism, and leverages low-quality synthetic samples for contrastive generation via collaboration among dynamically weighted multiple pre-trained models.Extensive experiments on 6 well-developed datasets with 6 open-source and 3 closed-source PLMs demonstrate the superiority of WASP in improving model performance over diverse downstream tasks. Code is available at https://anonymous.4open.science/r/WASP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00245v1-abstract-full').style.display = 'none'; document.getElementById('2502.00245v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 11 tables, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15830">arXiv:2501.15830</a> <span> [<a href="https://arxiv.org/pdf/2501.15830">pdf</a>, <a href="https://arxiv.org/format/2501.15830">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SpatialVLA: Exploring Spatial Representations for Visual-Language-Action Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+D">Delin Qu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Haoming Song</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qizhi Chen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuanqi Yao</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xinyi Ye</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yan Ding</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhigang Wang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">JiaYuan Gu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bin Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dong Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuelong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15830v3-abstract-short" style="display: inline;"> In this paper, we claim that spatial understanding is the keypoint in robot manipulation, and propose SpatialVLA to explore effective spatial representations for the robot foundation model. Specifically, we introduce Ego3D Position Encoding to inject 3D information into the input observations of the visual-language-action model, and propose Adaptive Action Grids to represent spatial robot movement… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15830v3-abstract-full').style.display = 'inline'; document.getElementById('2501.15830v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15830v3-abstract-full" style="display: none;"> In this paper, we claim that spatial understanding is the keypoint in robot manipulation, and propose SpatialVLA to explore effective spatial representations for the robot foundation model. Specifically, we introduce Ego3D Position Encoding to inject 3D information into the input observations of the visual-language-action model, and propose Adaptive Action Grids to represent spatial robot movement actions with adaptive discretized action grids, facilitating learning generalizable and transferrable spatial action knowledge for cross-robot control. SpatialVLA is first pre-trained on top of a vision-language model with 1.1 Million real-world robot episodes, to learn a generalist manipulation policy across multiple robot environments and tasks. After pre-training, SpatialVLA is directly applied to perform numerous tasks in a zero-shot manner. The superior results in both simulation and real-world robots demonstrate its advantage of inferring complex robot motion trajectories and its strong in-domain multi-task generalization ability. We further show the proposed Adaptive Action Grids offer a new and effective way to fine-tune the pre-trained SpatialVLA model for new simulation and real-world setups, where the pre-learned action grids are re-discretized to capture robot-specific spatial action movements of new setups. The superior results from extensive evaluations demonstrate the exceptional in-distribution generalization and out-of-distribution adaptation capability, highlighting the crucial benefit of the proposed spatial-aware representations for generalist robot policy learning. All the details and codes will be open-sourced. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15830v3-abstract-full').style.display = 'none'; document.getElementById('2501.15830v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14548">arXiv:2501.14548</a> <span> [<a href="https://arxiv.org/pdf/2501.14548">pdf</a>, <a href="https://arxiv.org/format/2501.14548">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Large-scale and Fine-grained Vision-language Pre-training for Enhanced CT Image Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shui%2C+Z">Zhongyi Shui</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianpeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+W">Weiwei Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sinuo Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+R">Ruizhe Guo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Lin Yang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xianghua Ye</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+T">Tingbo Liang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Ling Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14548v1-abstract-short" style="display: inline;"> Artificial intelligence (AI) shows great potential in assisting radiologists to improve the efficiency and accuracy of medical image interpretation and diagnosis. However, a versatile AI model requires large-scale data and comprehensive annotations, which are often impractical in medical settings. Recent studies leverage radiology reports as a naturally high-quality supervision for medical images,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14548v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14548v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14548v1-abstract-full" style="display: none;"> Artificial intelligence (AI) shows great potential in assisting radiologists to improve the efficiency and accuracy of medical image interpretation and diagnosis. However, a versatile AI model requires large-scale data and comprehensive annotations, which are often impractical in medical settings. Recent studies leverage radiology reports as a naturally high-quality supervision for medical images, using contrastive language-image pre-training (CLIP) to develop language-informed models for radiological image interpretation. Nonetheless, these approaches typically contrast entire images with reports, neglecting the local associations between imaging regions and report sentences, which may undermine model performance and interoperability. In this paper, we propose a fine-grained vision-language model (fVLM) for anatomy-level CT image interpretation. Specifically, we explicitly match anatomical regions of CT images with corresponding descriptions in radiology reports and perform contrastive pre-training for each anatomy individually. Fine-grained alignment, however, faces considerable false-negative challenges, mainly from the abundance of anatomy-level healthy samples and similarly diseased abnormalities. To tackle this issue, we propose identifying false negatives of both normal and abnormal samples and calibrating contrastive learning from patient-level to disease-aware pairing. We curated the largest CT dataset to date, comprising imaging and report data from 69,086 patients, and conducted a comprehensive evaluation of 54 major and important disease diagnosis tasks across 15 main anatomies. Experimental results demonstrate the substantial potential of fVLM in versatile medical image interpretation. In the zero-shot classification task, we achieved an average AUC of 81.3% on 54 diagnosis tasks, surpassing CLIP and supervised methods by 12.9% and 8.0%, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14548v1-abstract-full').style.display = 'none'; document.getElementById('2501.14548v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13072">arXiv:2501.13072</a> <span> [<a href="https://arxiv.org/pdf/2501.13072">pdf</a>, <a href="https://arxiv.org/format/2501.13072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AdaWM: Adaptive World Model based Planning for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hang Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xin Ye</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+F">Feng Tao</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+C">Chenbin Pan</a>, <a href="/search/cs?searchtype=author&query=Mallik%2C+A">Abhirup Mallik</a>, <a href="/search/cs?searchtype=author&query=Yaman%2C+B">Burhaneddin Yaman</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+L">Liu Ren</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junshan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13072v2-abstract-short" style="display: inline;"> World model based reinforcement learning (RL) has emerged as a promising approach for autonomous driving, which learns a latent dynamics model and uses it to train a planning policy. To speed up the learning process, the pretrain-finetune paradigm is often used, where online RL is initialized by a pretrained model and a policy learned offline. However, naively performing such initialization in RL… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13072v2-abstract-full').style.display = 'inline'; document.getElementById('2501.13072v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13072v2-abstract-full" style="display: none;"> World model based reinforcement learning (RL) has emerged as a promising approach for autonomous driving, which learns a latent dynamics model and uses it to train a planning policy. To speed up the learning process, the pretrain-finetune paradigm is often used, where online RL is initialized by a pretrained model and a policy learned offline. However, naively performing such initialization in RL may result in dramatic performance degradation during the online interactions in the new task. To tackle this challenge, we first analyze the performance degradation and identify two primary root causes therein: the mismatch of the planning policy and the mismatch of the dynamics model, due to distribution shift. We further analyze the effects of these factors on performance degradation during finetuning, and our findings reveal that the choice of finetuning strategies plays a pivotal role in mitigating these effects. We then introduce AdaWM, an Adaptive World Model based planning method, featuring two key steps: (a) mismatch identification, which quantifies the mismatches and informs the finetuning strategy, and (b) alignment-driven finetuning, which selectively updates either the policy or the model as needed using efficient low-rank updates. Extensive experiments on the challenging CARLA driving tasks demonstrate that AdaWM significantly improves the finetuning process, resulting in more robust and efficient performance in autonomous driving systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13072v2-abstract-full').style.display = 'none'; document.getElementById('2501.13072v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11283">arXiv:2501.11283</a> <span> [<a href="https://arxiv.org/pdf/2501.11283">pdf</a>, <a href="https://arxiv.org/format/2501.11283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Large Language Model Agents for Radio Map Generation and Wireless Network Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Quan%2C+H">Hongye Quan</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+W">Wanli Ni</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tong Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiangyu Ye</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Ziyi Xie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuanwei Liu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Hui Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11283v2-abstract-short" style="display: inline;"> Using commercial software for radio map generation and wireless network planning often require complex manual operations, posing significant challenges in terms of scalability, adaptability, and user-friendliness, due to heavy manual operations. To address these issues, we propose an automated solution that employs large language model (LLM) agents. These agents are designed to autonomously genera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11283v2-abstract-full').style.display = 'inline'; document.getElementById('2501.11283v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11283v2-abstract-full" style="display: none;"> Using commercial software for radio map generation and wireless network planning often require complex manual operations, posing significant challenges in terms of scalability, adaptability, and user-friendliness, due to heavy manual operations. To address these issues, we propose an automated solution that employs large language model (LLM) agents. These agents are designed to autonomously generate radio maps and facilitate wireless network planning for specified areas, thereby minimizing the necessity for extensive manual intervention. To validate the effectiveness of our proposed solution, we develop a software platform that integrates LLM agents. Experimental results demonstrate that a large amount manual operations can be saved via the proposed LLM agent, and the automated solutions can achieve an enhanced coverage and signal-to-interference-noise ratio (SINR), especially in urban environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11283v2-abstract-full').style.display = 'none'; document.getElementById('2501.11283v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05414">arXiv:2501.05414</a> <span> [<a href="https://arxiv.org/pdf/2501.05414">pdf</a>, <a href="https://arxiv.org/format/2501.05414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LongProc: Benchmarking Long-Context Language Models on Long Procedural Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xi Ye</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+F">Fangcong Yin</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yinghui He</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Joie Zhang</a>, <a href="/search/cs?searchtype=author&query=Yen%2C+H">Howard Yen</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+T">Tianyu Gao</a>, <a href="/search/cs?searchtype=author&query=Durrett%2C+G">Greg Durrett</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Danqi Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05414v1-abstract-short" style="display: inline;"> Existing benchmarks for evaluating long-context language models (LCLMs) primarily focus on long-context recall, requiring models to produce short responses based on a few critical snippets while processing thousands of irrelevant tokens. We introduce LongProc (Long Procedural Generation), a new benchmark that requires both the integration of highly dispersed information and long-form generation. L… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05414v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05414v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05414v1-abstract-full" style="display: none;"> Existing benchmarks for evaluating long-context language models (LCLMs) primarily focus on long-context recall, requiring models to produce short responses based on a few critical snippets while processing thousands of irrelevant tokens. We introduce LongProc (Long Procedural Generation), a new benchmark that requires both the integration of highly dispersed information and long-form generation. LongProc consists of six diverse procedural generation tasks, such as extracting structured information from HTML pages into a TSV format and executing complex search procedures to create travel plans. These tasks challenge LCLMs by testing their ability to follow detailed procedural instructions, synthesize and reason over dispersed information, and generate structured, long-form outputs (up to 8K tokens). Furthermore, as these tasks adhere to deterministic procedures and yield structured outputs, they enable reliable rule-based evaluation. We evaluate 17 LCLMs on LongProc across three difficulty levels, with maximum numbers of output tokens set at 500, 2K, and 8K. Notably, while all tested models claim a context window size above 32K tokens, open-weight models typically falter on 2K-token tasks, and closed-source models like GPT-4o show significant degradation on 8K-token tasks. Further analysis reveals that LCLMs struggle to maintain long-range coherence in long-form generations. These findings highlight critical limitations in current LCLMs and suggest substantial room for improvement. Data and code available at: https://princeton-pli.github.io/LongProc <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05414v1-abstract-full').style.display = 'none'; document.getElementById('2501.05414v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03905">arXiv:2501.03905</a> <span> [<a href="https://arxiv.org/pdf/2501.03905">pdf</a>, <a href="https://arxiv.org/format/2501.03905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> mFabric: An Efficient and Scalable Fabric for Mixture-of-Experts Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+X">Xudong Liao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yijun Sun</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+H">Han Tian</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+X">Xinchen Wan</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yilun Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zilong Wang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhenghang Ren</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xinyang Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenxue Li</a>, <a href="/search/cs?searchtype=author&query=Tse%2C+K+F">Kin Fai Tse</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+Z">Zhizhen Zhong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guyue Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ying Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaofeng Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yiming Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03905v1-abstract-short" style="display: inline;"> Mixture-of-Expert (MoE) models outperform conventional models by selectively activating different subnets, named \emph{experts}, on a per-token basis. This gated computation generates dynamic communications that cannot be determined beforehand, challenging the existing GPU interconnects that remain \emph{static} during the distributed training process. In this paper, we advocate for a first-of-its… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03905v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03905v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03905v1-abstract-full" style="display: none;"> Mixture-of-Expert (MoE) models outperform conventional models by selectively activating different subnets, named \emph{experts}, on a per-token basis. This gated computation generates dynamic communications that cannot be determined beforehand, challenging the existing GPU interconnects that remain \emph{static} during the distributed training process. In this paper, we advocate for a first-of-its-kind system, called mFabric, that unlocks topology reconfiguration \emph{during} distributed MoE training. Towards this vision, we first perform a production measurement study and show that the MoE dynamic communication pattern has \emph{strong locality}, alleviating the requirement of global reconfiguration. Based on this, we design and implement a \emph{regionally reconfigurable high-bandwidth domain} on top of existing electrical interconnects using optical circuit switching (OCS), achieving scalability while maintaining rapid adaptability. We have built a fully functional mFabric prototype with commodity hardware and a customized collective communication runtime that trains state-of-the-art MoE models with \emph{in-training} topology reconfiguration across 32 A100 GPUs. Large-scale packet-level simulations show that mFabric delivers comparable performance as the non-blocking fat-tree fabric while boosting the training cost efficiency (e.g., performance per dollar) of four representative MoE models by 1.2$\times$--1.5$\times$ and 1.9$\times$--2.3$\times$ at 100 Gbps and 400 Gbps link bandwidths, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03905v1-abstract-full').style.display = 'none'; document.getElementById('2501.03905v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Corresponding authors: zhizhenz@mit.edu (Z. Zhong), kaichen@cse.ust.hk (K. Chen)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19127">arXiv:2412.19127</a> <span> [<a href="https://arxiv.org/pdf/2412.19127">pdf</a>, <a href="https://arxiv.org/format/2412.19127">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> SDRS: Shape-Differentiable Robot Simulator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaohan Ye</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xifeng Gao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+K">Kui Wu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zherong Pan</a>, <a href="/search/cs?searchtype=author&query=Komura%2C+T">Taku Komura</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19127v1-abstract-short" style="display: inline;"> Robot simulators are indispensable tools across many fields, and recent research has significantly improved their functionality by incorporating additional gradient information. However, existing differentiable robot simulators suffer from non-differentiable singularities, when robots undergo substantial shape changes. To address this, we present the Shape-Differentiable Robot Simulator (SDRS), de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19127v1-abstract-full').style.display = 'inline'; document.getElementById('2412.19127v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19127v1-abstract-full" style="display: none;"> Robot simulators are indispensable tools across many fields, and recent research has significantly improved their functionality by incorporating additional gradient information. However, existing differentiable robot simulators suffer from non-differentiable singularities, when robots undergo substantial shape changes. To address this, we present the Shape-Differentiable Robot Simulator (SDRS), designed to be differentiable under significant robot shape changes. The core innovation of SDRS lies in its representation of robot shapes using a set of convex polyhedrons. This approach allows us to generalize smooth, penalty-based contact mechanics for interactions between any pair of convex polyhedrons. Using the separating hyperplane theorem, SDRS introduces a separating plane for each pair of contacting convex polyhedrons. This separating plane functions as a zero-mass auxiliary entity, with its state determined by the principle of least action. This setup ensures global differentiability, even as robot shapes undergo significant geometric and topological changes. To demonstrate the practical value of SDRS, we provide examples of robot co-design scenarios, where both robot shapes and control movements are optimized simultaneously. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19127v1-abstract-full').style.display = 'none'; document.getElementById('2412.19127v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18856">arXiv:2412.18856</a> <span> [<a href="https://arxiv.org/pdf/2412.18856">pdf</a>, <a href="https://arxiv.org/ps/2412.18856">ps</a>, <a href="https://arxiv.org/format/2412.18856">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Digital Twin Enhanced Deep Reinforcement Learning for Intelligent Omni-Surface Configurations in MU-MIMO Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaowen Ye</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xianghao Yu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+L">Liqun Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18856v1-abstract-short" style="display: inline;"> Intelligent omni-surface (IOS) is a promising technique to enhance the capacity of wireless networks, by reflecting and refracting the incident signal simultaneously. Traditional IOS configuration schemes, relying on all sub-channels' channel state information and user equipments' mobility, are difficult to implement in complex realistic systems. Existing works attempt to address this issue employ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18856v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18856v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18856v1-abstract-full" style="display: none;"> Intelligent omni-surface (IOS) is a promising technique to enhance the capacity of wireless networks, by reflecting and refracting the incident signal simultaneously. Traditional IOS configuration schemes, relying on all sub-channels' channel state information and user equipments' mobility, are difficult to implement in complex realistic systems. Existing works attempt to address this issue employing deep reinforcement learning (DRL), but this method requires a lot of trial-and-error interactions with the external environment for efficient results and thus cannot satisfy the real-time decision-making. To enable model-free and real-time IOS control, this paper puts forth a new framework that integrates DRL and digital twins. DeepIOS, a DRL based IOS configuration scheme with the goal of maximizing the sum data rate, is first developed to jointly optimize the phase-shift and amplitude of IOS in multi-user multiple-input-multiple-output systems. Thereafter, to further reduce the computational complexity, DeepIOS introduces an action branch architecture, which separately decides two optimization variables in parallel. Finally, a digital twin module is constructed through supervised learning as a pre-verification platform for DeepIOS, such that the decision-making's real-time can be guaranteed. The formulated framework is a closed-loop system, in which the physical space provides data to establish and calibrate the digital space, while the digital space generates experience samples for DeepIOS training and sends the trained parameters to the IOS controller for configurations. Numerical results show that compared with random and MAB schemes, the proposed framework attains a higher data rate and is more robust to different settings. Furthermore, the action branch architecture reduces DeepIOS's computational complexity, and the digital twin module improves the convergence speed and run-time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18856v1-abstract-full').style.display = 'none'; document.getElementById('2412.18856v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in IEEE JIoT. Due to the limitation "The abstract field cannot be longer than 1,920 characters", the abstract appearing above is slightly shorter than that in the main PDF file</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16485">arXiv:2412.16485</a> <span> [<a href="https://arxiv.org/pdf/2412.16485">pdf</a>, <a href="https://arxiv.org/format/2412.16485">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Fast Biclique Counting on Bipartite Graphs: A Node Pivot-based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaowei Ye</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rong-Hua Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+L">Longlong Lin</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+S">Shaojie Qiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guoren Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16485v1-abstract-short" style="display: inline;"> Counting the number of $(p, q)$-bicliques (complete bipartite subgraphs) in a bipartite graph is a fundamental problem which plays a crucial role in numerous bipartite graph analysis applications. However, existing algorithms for counting $(p, q)$-bicliques often face significant computational challenges, particularly on large real-world networks. In this paper, we propose a general biclique count… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16485v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16485v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16485v1-abstract-full" style="display: none;"> Counting the number of $(p, q)$-bicliques (complete bipartite subgraphs) in a bipartite graph is a fundamental problem which plays a crucial role in numerous bipartite graph analysis applications. However, existing algorithms for counting $(p, q)$-bicliques often face significant computational challenges, particularly on large real-world networks. In this paper, we propose a general biclique counting framework, called \npivot, based on a novel concept of node-pivot. We show that previous methods can be viewed as specific implementations of this general framework. More importantly, we propose a novel implementation of \npivot based on a carefully-designed minimum non-neighbor candidate partition strategy. We prove that our new implementation of \npivot has lower worst-case time complexity than the state-of-the-art methods. Beyond basic biclique counting, a nice feature of \npivot is that it also supports local counting (computing bicliques per node) and range counting (simultaneously counting bicliques within a size range). Extensive experiments on 12 real-world large datasets demonstrate that our proposed \npivot substantially outperforms state-of-the-art algorithms by up to two orders of magnitude. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16485v1-abstract-full').style.display = 'none'; document.getElementById('2412.16485v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16085">arXiv:2412.16085</a> <span> [<a href="https://arxiv.org/pdf/2412.16085">pdf</a>, <a href="https://arxiv.org/format/2412.16085">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient MedSAMs: Segment Anything in Medical Images on Laptop </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jun Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Feifei Li</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Sumin Kim</a>, <a href="/search/cs?searchtype=author&query=Asakereh%2C+R">Reza Asakereh</a>, <a href="/search/cs?searchtype=author&query=Le%2C+B">Bao-Hiep Le</a>, <a href="/search/cs?searchtype=author&query=Nguyen-Vu%2C+D">Dang-Khoa Nguyen-Vu</a>, <a href="/search/cs?searchtype=author&query=Pfefferle%2C+A">Alexander Pfefferle</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+M">Muxin Wei</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+R">Ruochen Gao</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+D">Donghang Lyu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Songxiao Yang</a>, <a href="/search/cs?searchtype=author&query=Purucker%2C+L">Lennart Purucker</a>, <a href="/search/cs?searchtype=author&query=Marinov%2C+Z">Zdravko Marinov</a>, <a href="/search/cs?searchtype=author&query=Staring%2C+M">Marius Staring</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Haisheng Lu</a>, <a href="/search/cs?searchtype=author&query=Dao%2C+T+T">Thuy Thanh Dao</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xincheng Ye</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhi Li</a>, <a href="/search/cs?searchtype=author&query=Brugnara%2C+G">Gianluca Brugnara</a>, <a href="/search/cs?searchtype=author&query=Vollmuth%2C+P">Philipp Vollmuth</a>, <a href="/search/cs?searchtype=author&query=Foltyn-Dumitru%2C+M">Martha Foltyn-Dumitru</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+J">Jaeyoung Cho</a>, <a href="/search/cs?searchtype=author&query=Mahmutoglu%2C+M+A">Mustafa Ahmed Mahmutoglu</a>, <a href="/search/cs?searchtype=author&query=Bendszus%2C+M">Martin Bendszus</a>, <a href="/search/cs?searchtype=author&query=Pfl%C3%BCger%2C+I">Irada Pfl眉ger</a> , et al. (57 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16085v1-abstract-short" style="display: inline;"> Promptable segmentation foundation models have emerged as a transformative approach to addressing the diverse needs in medical images, but most existing models require expensive computing, posing a big barrier to their adoption in clinical practice. In this work, we organized the first international competition dedicated to promptable medical image segmentation, featuring a large-scale dataset spa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16085v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16085v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16085v1-abstract-full" style="display: none;"> Promptable segmentation foundation models have emerged as a transformative approach to addressing the diverse needs in medical images, but most existing models require expensive computing, posing a big barrier to their adoption in clinical practice. In this work, we organized the first international competition dedicated to promptable medical image segmentation, featuring a large-scale dataset spanning nine common imaging modalities from over 20 different institutions. The top teams developed lightweight segmentation foundation models and implemented an efficient inference pipeline that substantially reduced computational requirements while maintaining state-of-the-art segmentation accuracy. Moreover, the post-challenge phase advanced the algorithms through the design of performance booster and reproducibility tasks, resulting in improved algorithms and validated reproducibility of the winning solution. Furthermore, the best-performing algorithms have been incorporated into the open-source software with a user-friendly interface to facilitate clinical adoption. The data and code are publicly available to foster the further development of medical image segmentation foundation models and pave the way for impactful real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16085v1-abstract-full').style.display = 'none'; document.getElementById('2412.16085v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2024 MedSAM on Laptop Competition Summary: https://www.codabench.org/competitions/1847/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11983">arXiv:2412.11983</a> <span> [<a href="https://arxiv.org/pdf/2412.11983">pdf</a>, <a href="https://arxiv.org/format/2412.11983">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Cost-Effective Label-free Node Classification with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Taiyan Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+R">Renchi Yang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+M">Mingyu Yan</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaochun Ye</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+D">Dongrui Fan</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yurui Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11983v1-abstract-short" style="display: inline;"> Graph neural networks (GNNs) have emerged as go-to models for node classification in graph data due to their powerful abilities in fusing graph structures and attributes. However, such models strongly rely on adequate high-quality labeled data for training, which are expensive to acquire in practice. With the advent of large language models (LLMs), a promising way is to leverage their superb zero-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11983v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11983v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11983v1-abstract-full" style="display: none;"> Graph neural networks (GNNs) have emerged as go-to models for node classification in graph data due to their powerful abilities in fusing graph structures and attributes. However, such models strongly rely on adequate high-quality labeled data for training, which are expensive to acquire in practice. With the advent of large language models (LLMs), a promising way is to leverage their superb zero-shot capabilities and massive knowledge for node labeling. Despite promising results reported, this methodology either demands considerable queries to LLMs, or suffers from compromised performance caused by noisy labels produced by LLMs. To remedy these issues, this work presents Cella, an active self-training framework that integrates LLMs into GNNs in a cost-effective manner. The design recipe of Cella is to iteratively identify small sets of "critical" samples using GNNs and extract informative pseudo-labels for them with both LLMs and GNNs as additional supervision signals to enhance model training. Particularly, Cella includes three major components: (i) an effective active node selection strategy for initial annotations; (ii) a judicious sample selection scheme to sift out the "critical" nodes based on label disharmonicity and entropy; and (iii) a label refinement module combining LLMs and GNNs with rewired topology. Our extensive experiments over five benchmark text-attributed graph datasets demonstrate that Cella significantly outperforms the state of the arts under the same query budget to LLMs in terms of label-free node classification. In particular, on the DBLP dataset with 14.3k nodes, Cella is able to achieve an 8.08% conspicuous improvement in accuracy over the state-of-the-art at a cost of less than one cent. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11983v1-abstract-full').style.display = 'none'; document.getElementById('2412.11983v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08941">arXiv:2412.08941</a> <span> [<a href="https://arxiv.org/pdf/2412.08941">pdf</a>, <a href="https://arxiv.org/format/2412.08941">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Optimized Gradient Clipping for Noisy Label Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xichen Ye</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yifan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weizhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoqiang Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yifan Chen</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+C">Cheng Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08941v4-abstract-short" style="display: inline;"> Previous research has shown that constraining the gradient of loss function with respect to model-predicted probabilities can enhance the model robustness against noisy labels. These methods typically specify a fixed optimal threshold for gradient clipping through validation data to obtain the desired robustness against noise. However, this common practice overlooks the dynamic distribution of gra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08941v4-abstract-full').style.display = 'inline'; document.getElementById('2412.08941v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08941v4-abstract-full" style="display: none;"> Previous research has shown that constraining the gradient of loss function with respect to model-predicted probabilities can enhance the model robustness against noisy labels. These methods typically specify a fixed optimal threshold for gradient clipping through validation data to obtain the desired robustness against noise. However, this common practice overlooks the dynamic distribution of gradients from both clean and noisy-labeled samples at different stages of training, significantly limiting the model capability to adapt to the variable nature of gradients throughout the training process. To address this issue, we propose a simple yet effective approach called Optimized Gradient Clipping (OGC), which dynamically adjusts the clipping threshold based on the ratio of noise gradients to clean gradients after clipping, estimated by modeling the distributions of clean and noisy samples. This approach allows us to modify the clipping threshold at each training step, effectively controlling the influence of noise gradients. Additionally, we provide statistical analysis to certify the noise-tolerance ability of OGC. Our extensive experiments across various types of label noise, including symmetric, asymmetric, instance-dependent, and real-world noise, demonstrate the effectiveness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08941v4-abstract-full').style.display = 'none'; document.getElementById('2412.08941v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2025</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07; 68T10 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; I.5.1; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07256">arXiv:2412.07256</a> <span> [<a href="https://arxiv.org/pdf/2412.07256">pdf</a>, <a href="https://arxiv.org/format/2412.07256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TIP.2024.3515873">10.1109/TIP.2024.3515873 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Modeling Dual-Exposure Quad-Bayer Patterns for Joint Denoising and Deblurring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yuzhi Zhao</a>, <a href="/search/cs?searchtype=author&query=Po%2C+L">Lai-Man Po</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xin Ye</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yongzhe Xu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Q">Qiong Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07256v1-abstract-short" style="display: inline;"> Image degradation caused by noise and blur remains a persistent challenge in imaging systems, stemming from limitations in both hardware and methodology. Single-image solutions face an inherent tradeoff between noise reduction and motion blur. While short exposures can capture clear motion, they suffer from noise amplification. Long exposures reduce noise but introduce blur. Learning-based single-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07256v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07256v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07256v1-abstract-full" style="display: none;"> Image degradation caused by noise and blur remains a persistent challenge in imaging systems, stemming from limitations in both hardware and methodology. Single-image solutions face an inherent tradeoff between noise reduction and motion blur. While short exposures can capture clear motion, they suffer from noise amplification. Long exposures reduce noise but introduce blur. Learning-based single-image enhancers tend to be over-smooth due to the limited information. Multi-image solutions using burst mode avoid this tradeoff by capturing more spatial-temporal information but often struggle with misalignment from camera/scene motion. To address these limitations, we propose a physical-model-based image restoration approach leveraging a novel dual-exposure Quad-Bayer pattern sensor. By capturing pairs of short and long exposures at the same starting point but with varying durations, this method integrates complementary noise-blur information within a single image. We further introduce a Quad-Bayer synthesis method (B2QB) to simulate sensor data from Bayer patterns to facilitate training. Based on this dual-exposure sensor model, we design a hierarchical convolutional neural network called QRNet to recover high-quality RGB images. The network incorporates input enhancement blocks and multi-level feature extraction to improve restoration quality. Experiments demonstrate superior performance over state-of-the-art deblurring and denoising methods on both synthetic and real-world datasets. The code, model, and datasets are publicly available at https://github.com/zhaoyuzhi/QRNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07256v1-abstract-full').style.display = 'none'; document.getElementById('2412.07256v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by IEEE Transactions on Image Processing (TIP)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.04074">arXiv:2412.04074</a> <span> [<a href="https://arxiv.org/pdf/2412.04074">pdf</a>, <a href="https://arxiv.org/ps/2412.04074">ps</a>, <a href="https://arxiv.org/format/2412.04074">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Integrated Sensing and Communications for Low-Altitude Economy: A Deep Reinforcement Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaowen Ye</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+Y">Yuyi Mao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xianghao Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+S">Shu Sun</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+L">Liqun Fu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jie Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.04074v3-abstract-short" style="display: inline;"> This paper studies an integrated sensing and communications (ISAC) system for low-altitude economy (LAE), where a ground base station (GBS) provides communication and navigation services for authorized unmanned aerial vehicles (UAVs), while sensing the low-altitude airspace to monitor the unauthorized mobile target. The expected communication sum-rate over a given flight period is maximized by joi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04074v3-abstract-full').style.display = 'inline'; document.getElementById('2412.04074v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.04074v3-abstract-full" style="display: none;"> This paper studies an integrated sensing and communications (ISAC) system for low-altitude economy (LAE), where a ground base station (GBS) provides communication and navigation services for authorized unmanned aerial vehicles (UAVs), while sensing the low-altitude airspace to monitor the unauthorized mobile target. The expected communication sum-rate over a given flight period is maximized by jointly optimizing the beamforming at the GBS and UAVs' trajectories, subject to the constraints on the average signal-to-noise ratio requirement for sensing, the flight mission and collision avoidance of UAVs, as well as the maximum transmit power at the GBS. Typically, this is a sequential decision-making problem with the given flight mission. Thus, we transform it to a specific Markov decision process (MDP) model called episode task. Based on this modeling, we propose a novel LAE-oriented ISAC scheme, referred to as Deep LAE-ISAC (DeepLSC), by leveraging the deep reinforcement learning (DRL) technique. In DeepLSC, a reward function and a new action selection policy termed constrained noise-exploration policy are judiciously designed to fulfill various constraints. To enable efficient learning in episode tasks, we develop a hierarchical experience replay mechanism, where the gist is to employ all experiences generated within each episode to jointly train the neural network. Besides, to enhance the convergence speed of DeepLSC, a symmetric experience augmentation mechanism, which simultaneously permutes the indexes of all variables to enrich available experience sets, is proposed. Simulation results demonstrate that compared with benchmarks, DeepLSC yields a higher sum-rate while meeting the preset constraints, achieves faster convergence, and is more robust against different settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04074v3-abstract-full').style.display = 'none'; document.getElementById('2412.04074v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted for an IEEE publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03387">arXiv:2412.03387</a> <span> [<a href="https://arxiv.org/pdf/2412.03387">pdf</a>, <a href="https://arxiv.org/format/2412.03387">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Model Predictive Control for Differential-Algebraic Systems towards a Higher Path Accuracy for Physically Coupled Robots </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xin Ye</a>, <a href="/search/cs?searchtype=author&query=Handwerker%2C+K">Karl Handwerker</a>, <a href="/search/cs?searchtype=author&query=Hohmann%2C+S">S枚ren Hohmann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03387v1-abstract-short" style="display: inline;"> The physical coupling between robots has the potential to improve the capabilities of multi-robot systems in challenging manufacturing processes. However, the path tracking accuracy of physically coupled robots is not studied adequately, especially considering the uncertain kinematic parameters, the mechanical elasticity, and the built-in controllers of off-the-shelf robots. This paper addresses t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03387v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03387v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03387v1-abstract-full" style="display: none;"> The physical coupling between robots has the potential to improve the capabilities of multi-robot systems in challenging manufacturing processes. However, the path tracking accuracy of physically coupled robots is not studied adequately, especially considering the uncertain kinematic parameters, the mechanical elasticity, and the built-in controllers of off-the-shelf robots. This paper addresses these issues with a novel differential-algebraic system model which is verified against measurement data from real execution. The uncertain kinematic parameters are estimated online to adapt the model. Consequently, an adaptive model predictive controller is designed as a coordinator between the robots. The controller achieves a path tracking error reduction of 88.6% compared to the state-of-the-art benchmark in the simulation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03387v1-abstract-full').style.display = 'none'; document.getElementById('2412.03387v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03058">arXiv:2412.03058</a> <span> [<a href="https://arxiv.org/pdf/2412.03058">pdf</a>, <a href="https://arxiv.org/format/2412.03058">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Revisiting Energy-Based Model for Out-of-Distribution Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yifan Wu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xichen Ye</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+S">Songmin Dai</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+D">Dengye Pan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoqiang Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weizhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yifan Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03058v1-abstract-short" style="display: inline;"> Out-of-distribution (OOD) detection is an essential approach to robustifying deep learning models, enabling them to identify inputs that fall outside of their trained distribution. Existing OOD detection methods usually depend on crafted data, such as specific outlier datasets or elaborate data augmentations. While this is reasonable, the frequent mismatch between crafted data and OOD data limits… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03058v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03058v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03058v1-abstract-full" style="display: none;"> Out-of-distribution (OOD) detection is an essential approach to robustifying deep learning models, enabling them to identify inputs that fall outside of their trained distribution. Existing OOD detection methods usually depend on crafted data, such as specific outlier datasets or elaborate data augmentations. While this is reasonable, the frequent mismatch between crafted data and OOD data limits model robustness and generalizability. In response to this issue, we introduce Outlier Exposure by Simple Transformations (OEST), a framework that enhances OOD detection by leveraging "peripheral-distribution" (PD) data. Specifically, PD data are samples generated through simple data transformations, thus providing an efficient alternative to manually curated outliers. We adopt energy-based models (EBMs) to study PD data. We recognize the "energy barrier" in OOD detection, which characterizes the energy difference between in-distribution (ID) and OOD samples and eases detection. PD data are introduced to establish the energy barrier during training. Furthermore, this energy barrier concept motivates a theoretically grounded energy-barrier loss to replace the classical energy-bounded loss, leading to an improved paradigm, OEST*, which achieves a more effective and theoretically sound separation between ID and OOD samples. We perform empirical validation of our proposal, and extensive experiments across various benchmarks demonstrate that OEST* achieves better or similar accuracy compared with state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03058v1-abstract-full').style.display = 'none'; document.getElementById('2412.03058v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T05; 68T45 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.10; I.5.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.02996">arXiv:2412.02996</a> <span> [<a href="https://arxiv.org/pdf/2412.02996">pdf</a>, <a href="https://arxiv.org/format/2412.02996">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> CLAS: A Machine Learning Enhanced Framework for Exploring Large 3D Design Datasets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">XiuYu Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaolei Ye</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+J">Jui-Che Chang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yue Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.02996v1-abstract-short" style="display: inline;"> Three-dimensional (3D) objects have wide applications. Despite the growing interest in 3D modeling in academia and industries, designing and/or creating 3D objects from scratch remains time-consuming and challenging. With the development of generative artificial intelligence (AI), designers discover a new way to create images for ideation. However, generative AIs are less useful in creating 3D obj… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02996v1-abstract-full').style.display = 'inline'; document.getElementById('2412.02996v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.02996v1-abstract-full" style="display: none;"> Three-dimensional (3D) objects have wide applications. Despite the growing interest in 3D modeling in academia and industries, designing and/or creating 3D objects from scratch remains time-consuming and challenging. With the development of generative artificial intelligence (AI), designers discover a new way to create images for ideation. However, generative AIs are less useful in creating 3D objects with satisfying qualities. To allow 3D designers to access a wide range of 3D objects for creative activities based on their specific demands, we propose a machine learning (ML) enhanced framework CLAS - named after the four-step of capture, label, associate, and search - to enable fully automatic retrieval of 3D objects based on user specifications leveraging the existing datasets of 3D objects. CLAS provides an effective and efficient method for any person or organization to benefit from their existing but not utilized 3D datasets. In addition, CLAS may also be used to produce high-quality 3D object synthesis datasets for training and evaluating 3D generative models. As a proof of concept, we created and showcased a search system with a web user interface (UI) for retrieving 6,778 3D objects of chairs in the ShapeNet dataset powered by CLAS. In a close-set retrieval setting, our retrieval method achieves a mean reciprocal rank (MRR) of 0.58, top 1 accuracy of 42.27%, and top 10 accuracy of 89.64%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02996v1-abstract-full').style.display = 'none'; document.getElementById('2412.02996v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.02373">arXiv:2412.02373</a> <span> [<a href="https://arxiv.org/pdf/2412.02373">pdf</a>, <a href="https://arxiv.org/format/2412.02373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Active Negative Loss: A Robust Framework for Learning with Noisy Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xichen Ye</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yifan Wu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yiwen Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoqiang Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weizhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yifan Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.02373v1-abstract-short" style="display: inline;"> Deep supervised learning has achieved remarkable success across a wide range of tasks, yet it remains susceptible to overfitting when confronted with noisy labels. To address this issue, noise-robust loss functions offer an effective solution for enhancing learning in the presence of label noise. In this work, we systematically investigate the limitation of the recently proposed Active Passive Los… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02373v1-abstract-full').style.display = 'inline'; document.getElementById('2412.02373v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.02373v1-abstract-full" style="display: none;"> Deep supervised learning has achieved remarkable success across a wide range of tasks, yet it remains susceptible to overfitting when confronted with noisy labels. To address this issue, noise-robust loss functions offer an effective solution for enhancing learning in the presence of label noise. In this work, we systematically investigate the limitation of the recently proposed Active Passive Loss (APL), which employs Mean Absolute Error (MAE) as its passive loss function. Despite the robustness brought by MAE, one of its key drawbacks is that it pays equal attention to clean and noisy samples; this feature slows down convergence and potentially makes training difficult, particularly in large-scale datasets. To overcome these challenges, we introduce a novel loss function class, termed Normalized Negative Loss Functions (NNLFs), which serve as passive loss functions within the APL framework. NNLFs effectively address the limitations of MAE by concentrating more on memorized clean samples. By replacing MAE in APL with our proposed NNLFs, we enhance APL and present a new framework called Active Negative Loss (ANL). Moreover, in non-symmetric noise scenarios, we propose an entropy-based regularization technique to mitigate the vulnerability to the label imbalance. Extensive experiments demonstrate that the new loss functions adopted by our ANL framework can achieve better or comparable performance to state-of-the-art methods across various label noise types and in image segmentation tasks. The source code is available at: https://github.com/Virusdoll/Active-Negative-Loss. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02373v1-abstract-full').style.display = 'none'; document.getElementById('2412.02373v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T05; 62H35 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; I.4.8 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01137">arXiv:2412.01137</a> <span> [<a href="https://arxiv.org/pdf/2412.01137">pdf</a>, <a href="https://arxiv.org/format/2412.01137">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TextSSR: Diffusion-based Data Synthesis for Scene Text Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xingsong Ye</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Y">Yongkun Du</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+Y">Yunbo Tao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhineng Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01137v1-abstract-short" style="display: inline;"> Scene text recognition (STR) suffers from the challenges of either less realistic synthetic training data or the difficulty of collecting sufficient high-quality real-world data, limiting the effectiveness of trained STR models. Meanwhile, despite producing holistically appealing text images, diffusion-based text image generation methods struggle to generate accurate and realistic instance-level t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01137v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01137v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01137v1-abstract-full" style="display: none;"> Scene text recognition (STR) suffers from the challenges of either less realistic synthetic training data or the difficulty of collecting sufficient high-quality real-world data, limiting the effectiveness of trained STR models. Meanwhile, despite producing holistically appealing text images, diffusion-based text image generation methods struggle to generate accurate and realistic instance-level text on a large scale. To tackle this, we introduce TextSSR: a novel framework for Synthesizing Scene Text Recognition data via a diffusion-based universal text region synthesis model. It ensures accuracy by focusing on generating text within a specified image region and leveraging rich glyph and position information to create the less complex text region compared to the entire image. Furthermore, we utilize neighboring text within the region as a prompt to capture real-world font styles and layout patterns, guiding the generated text to resemble actual scenes. Finally, due to its prompt-free nature and capability for character-level synthesis, TextSSR enjoys a wonderful scalability and we construct an anagram-based TextSSR-F dataset with 0.4 million text instances with complexity and realism. Experiments show that models trained on added TextSSR-F data exhibit better accuracy compared to models trained on 4 million existing synthetic data. Moreover, its accuracy margin to models trained fully on a real-world dataset is less than 3.7%, confirming TextSSR's effectiveness and its great potential in scene text image synthesis. Our code is available at https://github.com/YesianRohn/TextSSR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01137v1-abstract-full').style.display = 'none'; document.getElementById('2412.01137v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00447">arXiv:2412.00447</a> <span> [<a href="https://arxiv.org/pdf/2412.00447">pdf</a>, <a href="https://arxiv.org/format/2412.00447">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ATP-LLaVA: Adaptive Token Pruning for Large Vision Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xubing Ye</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+Y">Yukang Gan</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yixiao Ge</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiao-Ping Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yansong Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00447v1-abstract-short" style="display: inline;"> Large Vision Language Models (LVLMs) have achieved significant success across multi-modal tasks. However, the computational cost of processing long visual tokens can be prohibitively expensive on resource-limited devices. Previous methods have identified redundancy in visual tokens within the Large Language Model (LLM) decoder layers and have mitigated this by pruning tokens using a pre-defined or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00447v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00447v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00447v1-abstract-full" style="display: none;"> Large Vision Language Models (LVLMs) have achieved significant success across multi-modal tasks. However, the computational cost of processing long visual tokens can be prohibitively expensive on resource-limited devices. Previous methods have identified redundancy in visual tokens within the Large Language Model (LLM) decoder layers and have mitigated this by pruning tokens using a pre-defined or fixed ratio, thereby reducing computational overhead. Nonetheless, we observe that the impact of pruning ratio varies across different LLM layers and instances (image-prompt pairs). Therefore, it is essential to develop a layer-wise and instance-wise vision token pruning strategy to balance computational cost and model performance effectively. We propose ATP-LLaVA, a novel approach that adaptively determines instance-specific token pruning ratios for each LLM layer. Specifically, we introduce an Adaptive Token Pruning (ATP) module, which computes the importance score and pruning threshold based on input instance adaptively. The ATP module can be seamlessly integrated between any two LLM layers with negligible computational overhead. Additionally, we develop a Spatial Augmented Pruning (SAP) strategy that prunes visual tokens with both token redundancy and spatial modeling perspectives. Our approach reduces the average token count by 75% while maintaining performance, with only a minimal 1.9% degradation across seven widely used benchmarks. The project page can be accessed via https://yxxxb.github.io/ATP-LLaVA-page/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00447v1-abstract-full').style.display = 'none'; document.getElementById('2412.00447v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18290">arXiv:2411.18290</a> <span> [<a href="https://arxiv.org/pdf/2411.18290">pdf</a>, <a href="https://arxiv.org/format/2411.18290">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Semantic Asymmetry for Precise Gross Tumor Volume Segmentation of Nasopharyngeal Carcinoma in Planning CT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zi Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Ying Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zeli Chen</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yanzhou Su</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+T">Tai Ma</a>, <a href="/search/cs?searchtype=author&query=Mok%2C+T+C+W">Tony C. W. Mok</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yan-Jie Zhou</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yunhai Bai</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zhinlin Zheng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yirui Wang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+J">Jia Ge</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xianghua Ye</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+S">Senxiang Yan</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+D">Dakai Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18290v2-abstract-short" style="display: inline;"> In the radiation therapy of nasopharyngeal carcinoma (NPC), clinicians typically delineate the gross tumor volume (GTV) using non-contrast planning computed tomography to ensure accurate radiation dose delivery. However, the low contrast between tumors and adjacent normal tissues necessitates that radiation oncologists manually delineate the tumors, often relying on diagnostic MRI for guidance. %… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18290v2-abstract-full').style.display = 'inline'; document.getElementById('2411.18290v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18290v2-abstract-full" style="display: none;"> In the radiation therapy of nasopharyngeal carcinoma (NPC), clinicians typically delineate the gross tumor volume (GTV) using non-contrast planning computed tomography to ensure accurate radiation dose delivery. However, the low contrast between tumors and adjacent normal tissues necessitates that radiation oncologists manually delineate the tumors, often relying on diagnostic MRI for guidance. % In this study, we propose a novel approach to directly segment NPC gross tumors on non-contrast planning CT images, circumventing potential registration errors when aligning MRI or MRI-derived tumor masks to planning CT. To address the low contrast issues between tumors and adjacent normal structures in planning CT, we introduce a 3D Semantic Asymmetry Tumor segmentation (SATs) method. Specifically, we posit that a healthy nasopharyngeal region is characteristically bilaterally symmetric, whereas the emergence of nasopharyngeal carcinoma disrupts this symmetry. Then, we propose a Siamese contrastive learning segmentation framework that minimizes the voxel-wise distance between original and flipped areas without tumor and encourages a larger distance between original and flipped areas with tumor. Thus, our approach enhances the sensitivity of features to semantic asymmetries. % Extensive experiments demonstrate that the proposed SATs achieves the leading NPC GTV segmentation performance in both internal and external testing, \emph{e.g.}, with at least 2\% absolute Dice score improvement and 12\% average distance error reduction when compared to other state-of-the-art methods in the external testing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18290v2-abstract-full').style.display = 'none'; document.getElementById('2411.18290v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10639">arXiv:2411.10639</a> <span> [<a href="https://arxiv.org/pdf/2411.10639">pdf</a>, <a href="https://arxiv.org/format/2411.10639">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MTA: Multimodal Task Alignment for BEV Perception and Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yunsheng Ma</a>, <a href="/search/cs?searchtype=author&query=Yaman%2C+B">Burhaneddin Yaman</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xin Ye</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+F">Feng Tao</a>, <a href="/search/cs?searchtype=author&query=Mallik%2C+A">Abhirup Mallik</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziran Wang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+L">Liu Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10639v1-abstract-short" style="display: inline;"> Bird's eye view (BEV)-based 3D perception plays a crucial role in autonomous driving applications. The rise of large language models has spurred interest in BEV-based captioning to understand object behavior in the surrounding environment. However, existing approaches treat perception and captioning as separate tasks, focusing on the performance of only one of the tasks and overlooking the potenti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10639v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10639v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10639v1-abstract-full" style="display: none;"> Bird's eye view (BEV)-based 3D perception plays a crucial role in autonomous driving applications. The rise of large language models has spurred interest in BEV-based captioning to understand object behavior in the surrounding environment. However, existing approaches treat perception and captioning as separate tasks, focusing on the performance of only one of the tasks and overlooking the potential benefits of multimodal alignment. To bridge this gap between modalities, we introduce MTA, a novel multimodal task alignment framework that boosts both BEV perception and captioning. MTA consists of two key components: (1) BEV-Language Alignment (BLA), a contextual learning mechanism that aligns the BEV scene representations with ground-truth language representations, and (2) Detection-Captioning Alignment (DCA), a cross-modal prompting mechanism that aligns detection and captioning outputs. MTA integrates into state-of-the-art baselines during training, adding no extra computational complexity at runtime. Extensive experiments on the nuScenes and TOD3Cap datasets show that MTA significantly outperforms state-of-the-art baselines, achieving a 4.9% improvement in perception and a 9.2% improvement in captioning. These results underscore the effectiveness of unified alignment in reconciling BEV-based perception and captioning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10639v1-abstract-full').style.display = 'none'; document.getElementById('2411.10639v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05050">arXiv:2411.05050</a> <span> [<a href="https://arxiv.org/pdf/2411.05050">pdf</a>, <a href="https://arxiv.org/format/2411.05050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Selecting Between BERT and GPT for Text Classification in Political Science Research </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+W">Wen Qu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xin Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05050v1-abstract-short" style="display: inline;"> Political scientists often grapple with data scarcity in text classification. Recently, fine-tuned BERT models and their variants have gained traction as effective solutions to address this issue. In this study, we investigate the potential of GPT-based models combined with prompt engineering as a viable alternative. We conduct a series of experiments across various classification tasks, differing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05050v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05050v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05050v1-abstract-full" style="display: none;"> Political scientists often grapple with data scarcity in text classification. Recently, fine-tuned BERT models and their variants have gained traction as effective solutions to address this issue. In this study, we investigate the potential of GPT-based models combined with prompt engineering as a viable alternative. We conduct a series of experiments across various classification tasks, differing in the number of classes and complexity, to evaluate the effectiveness of BERT-based versus GPT-based models in low-data scenarios. Our findings indicate that while zero-shot and few-shot learning with GPT models provide reasonable performance and are well-suited for early-stage research exploration, they generally fall short - or, at best, match - the performance of BERT fine-tuning, particularly as the training set reaches a substantial size (e.g., 1,000 samples). We conclude by comparing these approaches in terms of performance, ease of use, and cost, providing practical guidance for researchers facing data limitations. Our results are particularly relevant for those engaged in quantitative text analysis in low-resource settings or with limited labeled data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05050v1-abstract-full').style.display = 'none'; document.getElementById('2411.05050v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 5 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00734">arXiv:2411.00734</a> <span> [<a href="https://arxiv.org/pdf/2411.00734">pdf</a>, <a href="https://arxiv.org/format/2411.00734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Multilayer Dataflow: Orchestrate Butterfly Sparsity to Accelerate Attention Computation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haibin Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenming Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+K">Kai Yan</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Z">Zhihua Fan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+P">Peiyang Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuqun Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yanhuan Liu</a>, <a href="/search/cs?searchtype=author&query=Qiang%2C+Z">Ziqing Qiang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Meng Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kunming Liu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaochun Ye</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+D">Dongrui Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00734v2-abstract-short" style="display: inline;"> Recent neural networks (NNs) with self-attention exhibit competitiveness across different AI domains, but the essential attention mechanism brings massive computation and memory demands. To this end, various sparsity patterns are introduced to reduce the quadratic computation complexity, among which the structured butterfly sparsity has been proven efficient in computation reduction while maintain… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00734v2-abstract-full').style.display = 'inline'; document.getElementById('2411.00734v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00734v2-abstract-full" style="display: none;"> Recent neural networks (NNs) with self-attention exhibit competitiveness across different AI domains, but the essential attention mechanism brings massive computation and memory demands. To this end, various sparsity patterns are introduced to reduce the quadratic computation complexity, among which the structured butterfly sparsity has been proven efficient in computation reduction while maintaining model accuracy. However, its complicated data accessing pattern brings utilization degradation and makes parallelism hard to exploit in general block-oriented architecture like GPU. Since the reconfigurable dataflow architecture is known to have better data reusability and architectural flexibility in general NN-based acceleration, we want to apply it to the butterfly sparsity for acquiring better computational efficiency for attention workloads. We first propose a hybrid butterfly-sparsity network to obtain better trade-offs between attention accuracy and performance. Next, we propose a scalable multilayer dataflow method supported by coarse-grained streaming parallelism designs, to orchestrate the butterfly sparsity computation on the dataflow array. The experiments show that compared with Jetson Xavier NX, our design has a speedup of up to $14.34\times$ ($9.29\times$ on average) as well as $11.14\times$ energy efficiency advancement in attention workloads. In comparison with SOTA attention accelerators of the same peak performance, our dataflow architecture acquires $2.38\times$-$4.7\times$ efficiency improvement as well as $6.60\times$-$15.37\times$ energy reduction with butterfly sparsity optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00734v2-abstract-full').style.display = 'none'; document.getElementById('2411.00734v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 17 figures, ISCA 2025, 2024/11/23, Butterfly Sparsity Optimization Using Dataflow</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21445">arXiv:2410.21445</a> <span> [<a href="https://arxiv.org/pdf/2410.21445">pdf</a>, <a href="https://arxiv.org/format/2410.21445">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> TALE-teller: Tendon-Actuated Linked Element Robotic Testbed for Investigating Tail Functions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+M+J">Margaret J. Zhang</a>, <a href="/search/cs?searchtype=author&query=Pradhan%2C+A+A">Anvay A. Pradhan</a>, <a href="/search/cs?searchtype=author&query=Brei%2C+Z">Zachary Brei</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+X">Xiangyun Bu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiang Ye</a>, <a href="/search/cs?searchtype=author&query=Jamal%2C+S">Saima Jamal</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+C+W">Chae Woo Lim</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaonan Huang</a>, <a href="/search/cs?searchtype=author&query=Moore%2C+T+Y">Talia Y. Moore</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21445v1-abstract-short" style="display: inline;"> Tails serve various functions in both robotics and biology, including expression, grasping, and defense. The vertebrate tails associated with these functions exhibit diverse patterns of vertebral lengths, but the precise mechanisms linking form to function have not yet been established. Vertebrate tails are complex musculoskeletal structures, making both direct experimentation and computational mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21445v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21445v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21445v1-abstract-full" style="display: none;"> Tails serve various functions in both robotics and biology, including expression, grasping, and defense. The vertebrate tails associated with these functions exhibit diverse patterns of vertebral lengths, but the precise mechanisms linking form to function have not yet been established. Vertebrate tails are complex musculoskeletal structures, making both direct experimentation and computational modeling challenging. This paper presents Tendon-Actuated Linked-Element (TALE), a modular robotic test bed to explore how tail morphology influences function. By varying 3D printed bones, silicone joints, and tendon configurations, TALE can match the morphology of extant, extinct, and even theoretical tails. We first characterized the stiffness of our joint design empirically and in simulation before testing the hypothesis that tails with different vertebral proportions curve differently. We then compared the maximum bending state of two common vertebrate proportions and one theoretical morphology. Uniform bending of joints with different vertebral proportions led to substantial differences in the location of the tail tip, suggesting a significant influence on overall tail function. Future studies can introduce more complex morphologies to establish the mechanisms of diverse tail functions. With this foundational knowledge, we will isolate the key features underlying tail function to inform the design for robotic tails. Images and videos can be found on TALE's project page: https://www.embirlab.com/tale. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21445v1-abstract-full').style.display = 'none'; document.getElementById('2410.21445v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21111">arXiv:2410.21111</a> <span> [<a href="https://arxiv.org/pdf/2410.21111">pdf</a>, <a href="https://arxiv.org/format/2410.21111">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> LAMA: Stable Dual-Domain Deep Reconstruction For Sparse-View CT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+C">Chi Ding</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qingchao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Ge Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaojing Ye</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yunmei Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21111v1-abstract-short" style="display: inline;"> Inverse problems arise in many applications, especially tomographic imaging. We develop a Learned Alternating Minimization Algorithm (LAMA) to solve such problems via two-block optimization by synergizing data-driven and classical techniques with proven convergence. LAMA is naturally induced by a variational model with learnable regularizers in both data and image domains, parameterized as composi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21111v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21111v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21111v1-abstract-full" style="display: none;"> Inverse problems arise in many applications, especially tomographic imaging. We develop a Learned Alternating Minimization Algorithm (LAMA) to solve such problems via two-block optimization by synergizing data-driven and classical techniques with proven convergence. LAMA is naturally induced by a variational model with learnable regularizers in both data and image domains, parameterized as composite functions of neural networks trained with domain-specific data. We allow these regularizers to be nonconvex and nonsmooth to extract features from data effectively. We minimize the overall objective function using Nesterov's smoothing technique and residual learning architecture. It is demonstrated that LAMA reduces network complexity, improves memory efficiency, and enhances reconstruction accuracy, stability, and interpretability. Extensive experiments show that LAMA significantly outperforms state-of-the-art methods on popular benchmark datasets for Computed Tomography. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21111v1-abstract-full').style.display = 'none'; document.getElementById('2410.21111v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Journal version for LAMA (Learned Alternating Minimization Algorithm)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18368">arXiv:2410.18368</a> <span> [<a href="https://arxiv.org/pdf/2410.18368">pdf</a>, <a href="https://arxiv.org/format/2410.18368">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Multi-objective Optimization in CPU Design Space Exploration: Attention is All You Need </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+R">Runzhen Xue</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Hao Wu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+M">Mingyu Yan</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Ziheng Xiao</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaochun Ye</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+D">Dongrui Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18368v1-abstract-short" style="display: inline;"> Design space exploration (DSE) enables architects to systematically evaluate various design options, guiding decisions on the most suitable configurations to meet specific objectives such as optimizing performance, power, and area. However, the growing complexity of modern CPUs has dramatically increased the number of micro-architectural parameters and expanded the overall design space, making DSE… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18368v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18368v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18368v1-abstract-full" style="display: none;"> Design space exploration (DSE) enables architects to systematically evaluate various design options, guiding decisions on the most suitable configurations to meet specific objectives such as optimizing performance, power, and area. However, the growing complexity of modern CPUs has dramatically increased the number of micro-architectural parameters and expanded the overall design space, making DSE more challenging and time-consuming. Existing DSE frameworks struggle in large-scale design spaces due to inaccurate models and limited insights into parameter impact, hindering efficient identification of optimal micro-architectures within tight timeframes. In this work, we introduce AttentionDSE. Its key idea is to use the attention mechanism to establish a direct mapping of micro-architectural parameters to their contributions to predicted performance. This approach enhances both the prediction accuracy and interpretability of the performance model. Furthermore, the weights are dynamically adjusted, enabling the model to respond to design changes and effectively pinpoint the key micro-architectural parameters/components responsible for performance bottlenecks. Thus, AttentionDSE accurately, purposefully, and rapidly discovers optimal designs. Experiments on SPEC 2017 demonstrate that AttentionDSE significantly reduces exploration time by over 80\% and achieves 3.9\% improvement in Pareto Hypervolume compared to state-of-the-art DSE frameworks while maintaining superior prediction accuracy and efficiency with an increasing number of parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18368v1-abstract-full').style.display = 'none'; document.getElementById('2410.18368v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16235">arXiv:2410.16235</a> <span> [<a href="https://arxiv.org/pdf/2410.16235">pdf</a>, <a href="https://arxiv.org/format/2410.16235">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ToW: Thoughts of Words Improve Reasoning in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhikun Xu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+M">Ming Shen</a>, <a href="/search/cs?searchtype=author&query=Dineen%2C+J">Jacob Dineen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaonan Li</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiao Ye</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Shijie Lu</a>, <a href="/search/cs?searchtype=author&query=RRV%2C+A">Aswin RRV</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+B">Ben Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16235v2-abstract-short" style="display: inline;"> We introduce thoughts of words (ToW), a novel training-time data-augmentation method for next-word prediction. ToW views next-word prediction as a core reasoning task and injects fine-grained thoughts explaining what the next word should be and how it is related to the previous contexts in pre-training texts. Our formulation addresses two fundamental drawbacks of existing next-word prediction lear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16235v2-abstract-full').style.display = 'inline'; document.getElementById('2410.16235v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16235v2-abstract-full" style="display: none;"> We introduce thoughts of words (ToW), a novel training-time data-augmentation method for next-word prediction. ToW views next-word prediction as a core reasoning task and injects fine-grained thoughts explaining what the next word should be and how it is related to the previous contexts in pre-training texts. Our formulation addresses two fundamental drawbacks of existing next-word prediction learning schemes: they induce factual hallucination and are inefficient for models to learn the implicit reasoning processes in raw texts. While there are many ways to acquire such thoughts of words, we explore the first step of acquiring ToW annotations through distilling from larger models. After continual pre-training with only 70K ToW annotations, we effectively improve models' reasoning performances by 7% to 9% on average and reduce model hallucination by up to 10%. At the same time, ToW is entirely agnostic to tasks and applications, introducing no additional biases on labels or semantics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16235v2-abstract-full').style.display = 'none'; document.getElementById('2410.16235v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NAACL 2025 Main Conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07819">arXiv:2410.07819</a> <span> [<a href="https://arxiv.org/pdf/2410.07819">pdf</a>, <a href="https://arxiv.org/format/2410.07819">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Uncovering Overfitting in Large Language Model Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mengqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaotian Ye</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qiang Liu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+P">Pengjie Ren</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Shu Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhumin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07819v1-abstract-short" style="display: inline;"> Knowledge editing has been proposed as an effective method for updating and correcting the internal knowledge of Large Language Models (LLMs). However, existing editing methods often struggle with complex tasks, such as multi-hop reasoning. In this paper, we identify and investigate the phenomenon of Editing Overfit, where edited models assign disproportionately high probabilities to the edit targ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07819v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07819v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07819v1-abstract-full" style="display: none;"> Knowledge editing has been proposed as an effective method for updating and correcting the internal knowledge of Large Language Models (LLMs). However, existing editing methods often struggle with complex tasks, such as multi-hop reasoning. In this paper, we identify and investigate the phenomenon of Editing Overfit, where edited models assign disproportionately high probabilities to the edit target, hindering the generalization of new knowledge in complex scenarios. We attribute this issue to the current editing paradigm, which places excessive emphasis on the direct correspondence between the input prompt and the edit target for each edit sample. To further explore this issue, we introduce a new benchmark, EVOKE (EValuation of Editing Overfit in Knowledge Editing), along with fine-grained evaluation metrics. Through comprehensive experiments and analysis, we demonstrate that Editing Overfit is prevalent in current editing methods and that common overfitting mitigation strategies are of limited effectiveness in knowledge editing. To overcome this, inspired by LLMs' knowledge recall mechanisms, we propose a new plug-and-play strategy called Learn to Inference (LTI), which introduce a Multi-stage Inference Constraint module to guide the edited models in recalling new knowledge similarly to how unedited LLMs leverage knowledge through in-context learning. Extensive experimental results across a wide range of tasks validate the effectiveness of LTI in mitigating Editing Overfit. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07819v1-abstract-full').style.display = 'none'; document.getElementById('2410.07819v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04689">arXiv:2410.04689</a> <span> [<a href="https://arxiv.org/pdf/2410.04689">pdf</a>, <a href="https://arxiv.org/format/2410.04689">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Low-Rank Continual Pyramid Vision Transformer: Incrementally Segment Whole-Body Organs in CT with Light-Weighted Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+V">Vince Zhu</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+Z">Zhanghexuan Ji</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+D">Dazhou Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Puyang Wang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yingda Xia</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xianghua Ye</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+W">Wei Zhu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+D">Dakai Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04689v1-abstract-short" style="display: inline;"> Deep segmentation networks achieve high performance when trained on specific datasets. However, in clinical practice, it is often desirable that pretrained segmentation models can be dynamically extended to enable segmenting new organs without access to previous training datasets or without training from scratch. This would ensure a much more efficient model development and deployment paradigm acc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04689v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04689v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04689v1-abstract-full" style="display: none;"> Deep segmentation networks achieve high performance when trained on specific datasets. However, in clinical practice, it is often desirable that pretrained segmentation models can be dynamically extended to enable segmenting new organs without access to previous training datasets or without training from scratch. This would ensure a much more efficient model development and deployment paradigm accounting for the patient privacy and data storage issues. This clinically preferred process can be viewed as a continual semantic segmentation (CSS) problem. Previous CSS works would either experience catastrophic forgetting or lead to unaffordable memory costs as models expand. In this work, we propose a new continual whole-body organ segmentation model with light-weighted low-rank adaptation (LoRA). We first train and freeze a pyramid vision transformer (PVT) base segmentation model on the initial task, then continually add light-weighted trainable LoRA parameters to the frozen model for each new learning task. Through a holistically exploration of the architecture modification, we identify three most important layers (i.e., patch-embedding, multi-head attention and feed forward layers) that are critical in adapting to the new segmentation tasks, while retaining the majority of the pretrained parameters fixed. Our proposed model continually segments new organs without catastrophic forgetting and meanwhile maintaining a low parameter increasing rate. Continually trained and tested on four datasets covering different body parts of a total of 121 organs, results show that our model achieves high segmentation accuracy, closely reaching the PVT and nnUNet upper bounds, and significantly outperforms other regularization-based CSS methods. When comparing to the leading architecture-based CSS method, our model has a substantial lower parameter increasing rate while achieving comparable performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04689v1-abstract-full').style.display = 'none'; document.getElementById('2410.04689v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Medical Image Computing and Computer Assisted Intervention -- MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00356">arXiv:2410.00356</a> <span> [<a href="https://arxiv.org/pdf/2410.00356">pdf</a>, <a href="https://arxiv.org/format/2410.00356">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Digital Twin Framework for Physical-Virtual Integration in V2X-Enabled Connected Vehicle Corridors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+K">Keshu Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pei Li</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yang Cheng</a>, <a href="/search/cs?searchtype=author&query=Parker%2C+S+T">Steven T. Parker</a>, <a href="/search/cs?searchtype=author&query=Ran%2C+B">Bin Ran</a>, <a href="/search/cs?searchtype=author&query=Noyce%2C+D+A">David A. Noyce</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xinyue Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00356v1-abstract-short" style="display: inline;"> Transportation Cyber-Physical Systems (T-CPS) are critical in improving traffic safety, reliability, and sustainability by integrating computing, communication, and control in transportation systems. The connected vehicle corridor is at the forefront of this transformation, where Cellular Vehicle-to-Everything (C-V2X) technology facilitates real-time data exchange between infrastructure, vehicles,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00356v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00356v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00356v1-abstract-full" style="display: none;"> Transportation Cyber-Physical Systems (T-CPS) are critical in improving traffic safety, reliability, and sustainability by integrating computing, communication, and control in transportation systems. The connected vehicle corridor is at the forefront of this transformation, where Cellular Vehicle-to-Everything (C-V2X) technology facilitates real-time data exchange between infrastructure, vehicles, and road users. However, challenges remain in processing and synchronizing the vast V2X data from vehicles and roadside units, particularly when ensuring scalability, data integrity, and operational resilience. This paper presents a digital twin framework for T-CPS, developed from a real-world connected vehicle corridor to address these challenges. By leveraging C-V2X technology and real-time data from infrastructure, vehicles, and road users, the digital twin accurately replicates vehicle behaviors, signal phases, and traffic patterns within the CARLA simulation environment. This framework demonstrates high fidelity between physical and digital systems and ensures robust synchronization of vehicle trajectories and signal phases through extensive experiments. Moreover, the digital twin's scalable and redundant architecture enhances data integrity, making it capable of supporting future large-scale C-V2X deployments. The digital twin is a vital tool in T-CPS, enabling real-time traffic monitoring, prediction, and optimization to enhance the reliability and safety of transportation systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00356v1-abstract-full').style.display = 'none'; document.getElementById('2410.00356v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.20558">arXiv:2409.20558</a> <span> [<a href="https://arxiv.org/pdf/2409.20558">pdf</a>, <a href="https://arxiv.org/format/2409.20558">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Uni$^2$Det: Unified and Universal Framework for Prompt-Guided Multi-dataset 3D Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yubin Wang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Z">Zhikang Zou</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaoqing Ye</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xiao Tan</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+E">Errui Ding</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Cairong Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.20558v1-abstract-short" style="display: inline;"> We present Uni$^2$Det, a brand new framework for unified and universal multi-dataset training on 3D detection, enabling robust performance across diverse domains and generalization to unseen domains. Due to substantial disparities in data distribution and variations in taxonomy across diverse domains, training such a detector by simply merging datasets poses a significant challenge. Motivated by t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20558v1-abstract-full').style.display = 'inline'; document.getElementById('2409.20558v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.20558v1-abstract-full" style="display: none;"> We present Uni$^2$Det, a brand new framework for unified and universal multi-dataset training on 3D detection, enabling robust performance across diverse domains and generalization to unseen domains. Due to substantial disparities in data distribution and variations in taxonomy across diverse domains, training such a detector by simply merging datasets poses a significant challenge. Motivated by this observation, we introduce multi-stage prompting modules for multi-dataset 3D detection, which leverages prompts based on the characteristics of corresponding datasets to mitigate existing differences. This elegant design facilitates seamless plug-and-play integration within various advanced 3D detection frameworks in a unified manner, while also allowing straightforward adaptation for universal applicability across datasets. Experiments are conducted across multiple dataset consolidation scenarios involving KITTI, Waymo, and nuScenes, demonstrating that our Uni$^2$Det outperforms existing methods by a large margin in multi-dataset training. Notably, results on zero-shot cross-dataset transfer validate the generalization capability of our proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20558v1-abstract-full').style.display = 'none'; document.getElementById('2409.20558v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 5 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15730">arXiv:2409.15730</a> <span> [<a href="https://arxiv.org/pdf/2409.15730">pdf</a>, <a href="https://arxiv.org/format/2409.15730">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning Multiple Probabilistic Decisions from Latent World Model in Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lingyu Xiao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiang-Jiang Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Sen Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaofan Li</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaoqing Ye</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wankou Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingdong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15730v1-abstract-short" style="display: inline;"> The autoregressive world model exhibits robust generalization capabilities in vectorized scene understanding but encounters difficulties in deriving actions due to insufficient uncertainty modeling and self-delusion. In this paper, we explore the feasibility of deriving decisions from an autoregressive world model by addressing these challenges through the formulation of multiple probabilistic hyp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15730v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15730v1-abstract-full" style="display: none;"> The autoregressive world model exhibits robust generalization capabilities in vectorized scene understanding but encounters difficulties in deriving actions due to insufficient uncertainty modeling and self-delusion. In this paper, we explore the feasibility of deriving decisions from an autoregressive world model by addressing these challenges through the formulation of multiple probabilistic hypotheses. We propose LatentDriver, a framework models the environment's next states and the ego vehicle's possible actions as a mixture distribution, from which a deterministic control signal is then derived. By incorporating mixture modeling, the stochastic nature of decisionmaking is captured. Additionally, the self-delusion problem is mitigated by providing intermediate actions sampled from a distribution to the world model. Experimental results on the recently released close-loop benchmark Waymax demonstrate that LatentDriver surpasses state-of-the-art reinforcement learning and imitation learning methods, achieving expert-level performance. The code and models will be made available at https://github.com/Sephirex-X/LatentDriver. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15730v1-abstract-full').style.display = 'none'; document.getElementById('2409.15730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12183">arXiv:2409.12183</a> <span> [<a href="https://arxiv.org/pdf/2409.12183">pdf</a>, <a href="https://arxiv.org/format/2409.12183">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> To CoT or not to CoT? Chain-of-thought helps mainly on math and symbolic reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sprague%2C+Z">Zayne Sprague</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+F">Fangcong Yin</a>, <a href="/search/cs?searchtype=author&query=Rodriguez%2C+J+D">Juan Diego Rodriguez</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+D">Dongwei Jiang</a>, <a href="/search/cs?searchtype=author&query=Wadhwa%2C+M">Manya Wadhwa</a>, <a href="/search/cs?searchtype=author&query=Singhal%2C+P">Prasann Singhal</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xinyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xi Ye</a>, <a href="/search/cs?searchtype=author&query=Mahowald%2C+K">Kyle Mahowald</a>, <a href="/search/cs?searchtype=author&query=Durrett%2C+G">Greg Durrett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12183v2-abstract-short" style="display: inline;"> Chain-of-thought (CoT) via prompting is the de facto method for eliciting reasoning capabilities from large language models (LLMs). But for what kinds of tasks is this extra ``thinking'' really helpful? To analyze this, we conducted a quantitative meta-analysis covering over 100 papers using CoT and ran our own evaluations of 20 datasets across 14 models. Our results show that CoT gives strong per… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12183v2-abstract-full').style.display = 'inline'; document.getElementById('2409.12183v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12183v2-abstract-full" style="display: none;"> Chain-of-thought (CoT) via prompting is the de facto method for eliciting reasoning capabilities from large language models (LLMs). But for what kinds of tasks is this extra ``thinking'' really helpful? To analyze this, we conducted a quantitative meta-analysis covering over 100 papers using CoT and ran our own evaluations of 20 datasets across 14 models. Our results show that CoT gives strong performance benefits primarily on tasks involving math or logic, with much smaller gains on other types of tasks. On MMLU, directly generating the answer without CoT leads to almost identical accuracy as CoT unless the question or model's response contains an equals sign, indicating symbolic operations and reasoning. Following this finding, we analyze the behavior of CoT on these problems by separating planning and execution and comparing against tool-augmented LLMs. Much of CoT's gain comes from improving symbolic execution, but it underperforms relative to using a symbolic solver. Our results indicate that CoT can be applied selectively, maintaining performance while saving inference costs. Furthermore, they suggest a need to move beyond prompt-based CoT to new paradigms that better leverage intermediate computation across the whole range of LLM applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12183v2-abstract-full').style.display = 'none'; document.getElementById('2409.12183v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Swapped column names for Table 7 and 8 in the appendix. Fixed the prompt for SocialIQA; results in figures and tables are updated (no major differences, but the prompt is now correct)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11676">arXiv:2409.11676</a> <span> [<a href="https://arxiv.org/pdf/2409.11676">pdf</a>, <a href="https://arxiv.org/format/2409.11676">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Hypergraph-based Motion Generation with Multi-modal Interaction Relational Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+K">Keshu Wu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yang Zhou</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+H">Haotian Shi</a>, <a href="/search/cs?searchtype=author&query=Lord%2C+D">Dominique Lord</a>, <a href="/search/cs?searchtype=author&query=Ran%2C+B">Bin Ran</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xinyue Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11676v1-abstract-short" style="display: inline;"> The intricate nature of real-world driving environments, characterized by dynamic and diverse interactions among multiple vehicles and their possible future states, presents considerable challenges in accurately predicting the motion states of vehicles and handling the uncertainty inherent in the predictions. Addressing these challenges requires comprehensive modeling and reasoning to capture the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11676v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11676v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11676v1-abstract-full" style="display: none;"> The intricate nature of real-world driving environments, characterized by dynamic and diverse interactions among multiple vehicles and their possible future states, presents considerable challenges in accurately predicting the motion states of vehicles and handling the uncertainty inherent in the predictions. Addressing these challenges requires comprehensive modeling and reasoning to capture the implicit relations among vehicles and the corresponding diverse behaviors. This research introduces an integrated framework for autonomous vehicles (AVs) motion prediction to address these complexities, utilizing a novel Relational Hypergraph Interaction-informed Neural mOtion generator (RHINO). RHINO leverages hypergraph-based relational reasoning by integrating a multi-scale hypergraph neural network to model group-wise interactions among multiple vehicles and their multi-modal driving behaviors, thereby enhancing motion prediction accuracy and reliability. Experimental validation using real-world datasets demonstrates the superior performance of this framework in improving predictive accuracy and fostering socially aware automated driving in dynamic traffic scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11676v1-abstract-full').style.display = 'none'; document.getElementById('2409.11676v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09605">arXiv:2409.09605</a> <span> [<a href="https://arxiv.org/pdf/2409.09605">pdf</a>, <a href="https://arxiv.org/format/2409.09605">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DreamMover: Leveraging the Prior of Diffusion Models for Image Interpolation with Large Motion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+L">Liao Shen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianqi Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Huiqiang Sun</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xinyi Ye</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Baopu Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianming Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Z">Zhiguo Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09605v2-abstract-short" style="display: inline;"> We study the problem of generating intermediate images from image pairs with large motion while maintaining semantic consistency. Due to the large motion, the intermediate semantic information may be absent in input images. Existing methods either limit to small motion or focus on topologically similar objects, leading to artifacts and inconsistency in the interpolation results. To overcome this c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09605v2-abstract-full').style.display = 'inline'; document.getElementById('2409.09605v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09605v2-abstract-full" style="display: none;"> We study the problem of generating intermediate images from image pairs with large motion while maintaining semantic consistency. Due to the large motion, the intermediate semantic information may be absent in input images. Existing methods either limit to small motion or focus on topologically similar objects, leading to artifacts and inconsistency in the interpolation results. To overcome this challenge, we delve into pre-trained image diffusion models for their capabilities in semantic cognition and representations, ensuring consistent expression of the absent intermediate semantic representations with the input. To this end, we propose DreamMover, a novel image interpolation framework with three main components: 1) A natural flow estimator based on the diffusion model that can implicitly reason about the semantic correspondence between two images. 2) To avoid the loss of detailed information during fusion, our key insight is to fuse information in two parts, high-level space and low-level space. 3) To enhance the consistency between the generated images and input, we propose the self-attention concatenation and replacement approach. Lastly, we present a challenging benchmark dataset InterpBench to evaluate the semantic consistency of generated results. Extensive experiments demonstrate the effectiveness of our method. Our project is available at https://dreamm0ver.github.io . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09605v2-abstract-full').style.display = 'none'; document.getElementById('2409.09605v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00633">arXiv:2409.00633</a> <span> [<a href="https://arxiv.org/pdf/2409.00633">pdf</a>, <a href="https://arxiv.org/format/2409.00633">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Make Your ViT-based Multi-view 3D Detectors Faster via Token Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dingyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+D">Dingkang Liang</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zichang Tan</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaoqing Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Cheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingdong Wang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+X">Xiang Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00633v1-abstract-short" style="display: inline;"> Slow inference speed is one of the most crucial concerns for deploying multi-view 3D detectors to tasks with high real-time requirements like autonomous driving. Although many sparse query-based methods have already attempted to improve the efficiency of 3D detectors, they neglect to consider the backbone, especially when using Vision Transformers (ViT) for better performance. To tackle this probl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00633v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00633v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00633v1-abstract-full" style="display: none;"> Slow inference speed is one of the most crucial concerns for deploying multi-view 3D detectors to tasks with high real-time requirements like autonomous driving. Although many sparse query-based methods have already attempted to improve the efficiency of 3D detectors, they neglect to consider the backbone, especially when using Vision Transformers (ViT) for better performance. To tackle this problem, we explore the efficient ViT backbones for multi-view 3D detection via token compression and propose a simple yet effective method called TokenCompression3D (ToC3D). By leveraging history object queries as foreground priors of high quality, modeling 3D motion information in them, and interacting them with image tokens through the attention mechanism, ToC3D can effectively determine the magnitude of information densities of image tokens and segment the salient foreground tokens. With the introduced dynamic router design, ToC3D can weigh more computing resources to important foreground tokens while compressing the information loss, leading to a more efficient ViT-based multi-view 3D detector. Extensive results on the large-scale nuScenes dataset show that our method can nearly maintain the performance of recent SOTA with up to 30% inference speedup, and the improvements are consistent after scaling up the ViT and input resolution. The code will be made at https://github.com/DYZhang09/ToC3D. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00633v1-abstract-full').style.display = 'none'; document.getElementById('2409.00633v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00494">arXiv:2409.00494</a> <span> [<a href="https://arxiv.org/pdf/2409.00494">pdf</a>, <a href="https://arxiv.org/format/2409.00494">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> GenAI-powered Multi-Agent Paradigm for Smart Urban Mobility: Opportunities and Challenges for Integrating Large Language Models (LLMs) and Retrieval-Augmented Generation (RAG) with Intelligent Transportation Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Haowen Xu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinghui Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Anye Zhou</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+G">Guanhao Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wan Li</a>, <a href="/search/cs?searchtype=author&query=Ban%2C+X">Xuegang Ban</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xinyue Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00494v2-abstract-short" style="display: inline;"> Leveraging recent advances in generative AI, multi-agent systems are increasingly being developed to enhance the functionality and efficiency of smart city applications. This paper explores the transformative potential of large language models (LLMs) and emerging Retrieval-Augmented Generation (RAG) technologies in Intelligent Transportation Systems (ITS), paving the way for innovative solutions t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00494v2-abstract-full').style.display = 'inline'; document.getElementById('2409.00494v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00494v2-abstract-full" style="display: none;"> Leveraging recent advances in generative AI, multi-agent systems are increasingly being developed to enhance the functionality and efficiency of smart city applications. This paper explores the transformative potential of large language models (LLMs) and emerging Retrieval-Augmented Generation (RAG) technologies in Intelligent Transportation Systems (ITS), paving the way for innovative solutions to address critical challenges in urban mobility. We begin by providing a comprehensive overview of the current state-of-the-art in mobility data, ITS, and Connected Vehicles (CV) applications. Building on this review, we discuss the rationale behind RAG and examine the opportunities for integrating these Generative AI (GenAI) technologies into the smart mobility sector. We propose a conceptual framework aimed at developing multi-agent systems capable of intelligently and conversationally delivering smart mobility services to urban commuters, transportation operators, and decision-makers. Our approach seeks to foster an autonomous and intelligent approach that (a) promotes science-based advisory to reduce traffic congestion, accidents, and carbon emissions at multiple scales, (b) facilitates public education and engagement in participatory mobility management, and (c) automates specialized transportation management tasks and the development of critical ITS platforms, such as data analytics and interpretation, knowledge representation, and traffic simulations. By integrating LLM and RAG, our approach seeks to overcome the limitations of traditional rule-based multi-agent systems, which rely on fixed knowledge bases and limited reasoning capabilities. This integration paves the way for a more scalable, intuitive, and automated multi-agent paradigm, driving advancements in ITS and urban mobility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00494v2-abstract-full').style.display = 'none'; document.getElementById('2409.00494v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.17325">arXiv:2408.17325</a> <span> [<a href="https://arxiv.org/pdf/2408.17325">pdf</a>, <a href="https://arxiv.org/format/2408.17325">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Disordered Systems and Neural Networks">cond-mat.dis-nn</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> </div> </div> <p class="title is-5 mathjax"> Impact of ChatGPT on the writing style of condensed matter physicists </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+S">Shaojun Xu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaohui Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mengqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.17325v1-abstract-short" style="display: inline;"> We apply a state-of-the-art difference-in-differences approach to estimate the impact of ChatGPT's release on the writing style of condensed matter papers on arXiv. Our analysis reveals a statistically significant improvement in the English quality of abstracts written by non-native English speakers. Importantly, this improvement remains robust even after accounting for other potential factors, co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.17325v1-abstract-full').style.display = 'inline'; document.getElementById('2408.17325v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.17325v1-abstract-full" style="display: none;"> We apply a state-of-the-art difference-in-differences approach to estimate the impact of ChatGPT's release on the writing style of condensed matter papers on arXiv. Our analysis reveals a statistically significant improvement in the English quality of abstracts written by non-native English speakers. Importantly, this improvement remains robust even after accounting for other potential factors, confirming that it can be attributed to the release of ChatGPT. This indicates widespread adoption of the tool. Following the release of ChatGPT, there is a significant increase in the use of unique words, while the frequency of rare words decreases. Across language families, the changes in writing style are significant for authors from the Latin and Ural-Altaic groups, but not for those from the Germanic or other Indo-European groups. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.17325v1-abstract-full').style.display = 'none'; document.getElementById('2408.17325v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 1 figure, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16975">arXiv:2408.16975</a> <span> [<a href="https://arxiv.org/pdf/2408.16975">pdf</a>, <a href="https://arxiv.org/format/2408.16975">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Technical Report of HelixFold3 for Biomolecular Structure Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lihang Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shanzhuo Zhang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Y">Yang Xue</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xianbin Ye</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+K">Kunrui Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuxin Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jie Gao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wenlai Zhao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hongkun Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhihua Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaonan Zhang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+X">Xiaomin Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16975v3-abstract-short" style="display: inline;"> The AlphaFold series has transformed protein structure prediction with remarkable accuracy, often matching experimental methods. AlphaFold2, AlphaFold-Multimer, and the latest AlphaFold3 represent significant strides in predicting single protein chains, protein complexes, and biomolecular structures. While AlphaFold2 and AlphaFold-Multimer are open-sourced, facilitating rapid and reliable predicti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16975v3-abstract-full').style.display = 'inline'; document.getElementById('2408.16975v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16975v3-abstract-full" style="display: none;"> The AlphaFold series has transformed protein structure prediction with remarkable accuracy, often matching experimental methods. AlphaFold2, AlphaFold-Multimer, and the latest AlphaFold3 represent significant strides in predicting single protein chains, protein complexes, and biomolecular structures. While AlphaFold2 and AlphaFold-Multimer are open-sourced, facilitating rapid and reliable predictions, AlphaFold3 remains partially accessible through a limited online server and has not been open-sourced, restricting further development. To address these challenges, the PaddleHelix team is developing HelixFold3, aiming to replicate AlphaFold3's capabilities. Leveraging insights from previous models and extensive datasets, HelixFold3 achieves accuracy comparable to AlphaFold3 in predicting the structures of the conventional ligands, nucleic acids, and proteins. The initial release of HelixFold3 is available as open source on GitHub for academic research, promising to advance biomolecular research and accelerate discoveries. The latest version will be continuously updated on the HelixFold3 web server, providing both interactive visualization and API access. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16975v3-abstract-full').style.display = 'none'; document.getElementById('2408.16975v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ye%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Ye%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository