CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 204 results for author: <span class="mathjax">Gong, C</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Gong%2C+C">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Gong, C"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Gong%2C+C&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Gong, C"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14487">arXiv:2411.14487</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.14487">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Ensuring Safety and Trust: Analyzing the Risks of Large Language Models in Medicine </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yifan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Q">Qiao Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Leaman%2C+R">Robert Leaman</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiaoyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+G">Guangzhi Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Sarfo-Gyamfi%2C+M">Maame Sarfo-Gyamfi</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Changlin Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Ferri%C3%A8re-Steinert%2C+S">Santiago Ferri猫re-Steinert</a>, <a href="/search/cs?searchtype=author&amp;query=Wilbur%2C+W+J">W. John Wilbur</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiaojun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+J">Jiaxin Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=An%2C+B">Bang An</a>, <a href="/search/cs?searchtype=author&amp;query=Castro%2C+K+S">Kelvin S. Castro</a>, <a href="/search/cs?searchtype=author&amp;query=%C3%81lvarez%2C+F+E">Francisco Erramuspe 脕lvarez</a>, <a href="/search/cs?searchtype=author&amp;query=Stockle%2C+M">Mat铆as Stockle</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+A">Aidong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+F">Furong Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Z">Zhiyong Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14487v1-abstract-short" style="display: inline;"> The remarkable capabilities of Large Language Models (LLMs) make them increasingly compelling for adoption in real-world healthcare applications. However, the risks associated with using LLMs in medical applications have not been systematically characterized. We propose using five key principles for safe and trustworthy medical AI: Truthfulness, Resilience, Fairness, Robustness, and Privacy, along&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14487v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14487v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14487v1-abstract-full" style="display: none;"> The remarkable capabilities of Large Language Models (LLMs) make them increasingly compelling for adoption in real-world healthcare applications. However, the risks associated with using LLMs in medical applications have not been systematically characterized. We propose using five key principles for safe and trustworthy medical AI: Truthfulness, Resilience, Fairness, Robustness, and Privacy, along with ten specific aspects. Under this comprehensive framework, we introduce a novel MedGuard benchmark with 1,000 expert-verified questions. Our evaluation of 11 commonly used LLMs shows that the current language models, regardless of their safety alignment mechanisms, generally perform poorly on most of our benchmarks, particularly when compared to the high performance of human physicians. Despite recent reports indicate that advanced LLMs like ChatGPT can match or even exceed human performance in various medical tasks, this study underscores a significant safety gap, highlighting the crucial need for human oversight and the implementation of AI safety guardrails. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14487v1-abstract-full').style.display = 'none'; document.getElementById('2411.14487v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19834">arXiv:2410.19834</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.19834">pdf</a>, <a href="https://arxiv.org/format/2410.19834">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GNNRL-Smoothing: A Prior-Free Reinforcement Learning Model for Mesh Smoothing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhichao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xinhai Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chunye Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+B">Bo Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+L">Liang Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yufei Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Pang%2C+Y">Yufei Pang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jie Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19834v1-abstract-short" style="display: inline;"> Mesh smoothing methods can enhance mesh quality by eliminating distorted elements, leading to improved convergence in simulations. To balance the efficiency and robustness of traditional mesh smoothing process, previous approaches have employed supervised learning and reinforcement learning to train intelligent smoothing models. However, these methods heavily rely on labeled dataset or prior knowl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19834v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19834v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19834v1-abstract-full" style="display: none;"> Mesh smoothing methods can enhance mesh quality by eliminating distorted elements, leading to improved convergence in simulations. To balance the efficiency and robustness of traditional mesh smoothing process, previous approaches have employed supervised learning and reinforcement learning to train intelligent smoothing models. However, these methods heavily rely on labeled dataset or prior knowledge to guide the models&#39; learning. Furthermore, their limited capacity to enhance mesh connectivity often restricts the effectiveness of smoothing. In this paper, we first systematically analyze the learning mechanisms of recent intelligent smoothing methods and propose a prior-free reinforcement learning model for intelligent mesh smoothing. Our proposed model integrates graph neural networks with reinforcement learning to implement an intelligent node smoothing agent and introduces, for the first time, a mesh connectivity improvement agent. We formalize mesh optimization as a Markov Decision Process and successfully train both agents using Twin Delayed Deep Deterministic Policy Gradient and Double Dueling Deep Q-Network in the absence of any prior data or knowledge. We verified the proposed model on both 2D and 3D meshes. Experimental results demonstrate that our model achieves feature-preserving smoothing on complex 3D surface meshes. It also achieves state-of-the-art results among intelligent smoothing methods on 2D meshes and is 7.16 times faster than traditional optimization-based smoothing methods. Moreover, the connectivity improvement agent can effectively enhance the quality distribution of the mesh. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19834v1-abstract-full').style.display = 'none'; document.getElementById('2410.19834v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18378">arXiv:2410.18378</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.18378">pdf</a>, <a href="https://arxiv.org/format/2410.18378">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Delta: A Cloud-assisted Data Enrichment Framework for On-Device Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zhenzhe Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+F">Fan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+X">Xiaofeng Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guihai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18378v1-abstract-short" style="display: inline;"> In modern mobile applications, users frequently encounter various new contexts, necessitating on-device continual learning (CL) to ensure consistent model performance. While existing research predominantly focused on developing lightweight CL frameworks, we identify that data scarcity is a critical bottleneck for on-device CL. In this work, we explore the potential of leveraging abundant cloud-sid&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18378v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18378v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18378v1-abstract-full" style="display: none;"> In modern mobile applications, users frequently encounter various new contexts, necessitating on-device continual learning (CL) to ensure consistent model performance. While existing research predominantly focused on developing lightweight CL frameworks, we identify that data scarcity is a critical bottleneck for on-device CL. In this work, we explore the potential of leveraging abundant cloud-side data to enrich scarce on-device data, and propose a private, efficient and effective data enrichment framework Delta. Specifically, Delta first introduces a directory dataset to decompose the data enrichment problem into device-side and cloud-side sub-problems without sharing sensitive data. Next, Delta proposes a soft data matching strategy to effectively solve the device-side sub-problem with sparse user data, and an optimal data sampling scheme for cloud server to retrieve the most suitable dataset for enrichment with low computational complexity. Further, Delta refines the data sampling scheme by jointly considering the impact of enriched data on both new and past contexts, mitigating the catastrophic forgetting issue from a new aspect. Comprehensive experiments across four typical mobile computing tasks with varied data modalities demonstrate that Delta could enhance the overall model accuracy by an average of 15.1%, 12.4%, 1.1% and 5.6% for visual, IMU, audio and textual tasks compared with few-shot CL, and consistently reduce the communication costs by over 90% compared to federated CL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18378v1-abstract-full').style.display = 'none'; document.getElementById('2410.18378v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13080">arXiv:2410.13080</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.13080">pdf</a>, <a href="https://arxiv.org/format/2410.13080">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Graph-constrained Reasoning: Faithful Reasoning on Knowledge Graphs with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+L">Linhao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Z">Zicheng Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Haffari%2C+G">Gholamreza Haffari</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+S">Shirui Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13080v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated impressive reasoning abilities, but they still struggle with faithful reasoning due to knowledge gaps and hallucinations. To address these issues, knowledge graphs (KGs) have been utilized to enhance LLM reasoning through their structured knowledge. However, existing KG-enhanced methods, either retrieval-based or agent-based, encounter difficulties in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13080v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13080v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13080v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated impressive reasoning abilities, but they still struggle with faithful reasoning due to knowledge gaps and hallucinations. To address these issues, knowledge graphs (KGs) have been utilized to enhance LLM reasoning through their structured knowledge. However, existing KG-enhanced methods, either retrieval-based or agent-based, encounter difficulties in accurately retrieving knowledge and efficiently traversing KGs at scale. In this work, we introduce graph-constrained reasoning (GCR), a novel framework that bridges structured knowledge in KGs with unstructured reasoning in LLMs. To eliminate hallucinations, GCR ensures faithful KG-grounded reasoning by integrating KG structure into the LLM decoding process through KG-Trie, a trie-based index that encodes KG reasoning paths. KG-Trie constrains the decoding process, allowing LLMs to directly reason on graphs and generate faithful reasoning paths grounded in KGs. Additionally, GCR leverages a lightweight KG-specialized LLM for graph-constrained reasoning alongside a powerful general LLM for inductive reasoning over multiple reasoning paths, resulting in accurate reasoning with zero reasoning hallucination. Extensive experiments on several KGQA benchmarks demonstrate that GCR achieves state-of-the-art performance and exhibits strong zero-shot generalizability to unseen KGs without additional training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13080v1-abstract-full').style.display = 'none'; document.getElementById('2410.13080v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10547">arXiv:2410.10547</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.10547">pdf</a>, <a href="https://arxiv.org/format/2410.10547">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Transformer for Early Alzheimer&#39;s Detection: Integration of Handwriting-Based 2D Images and 1D Signal Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Changqing Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+H">Huafeng Qin</a>, <a href="/search/cs?searchtype=author&amp;query=El-Yacoubi%2C+M+A">Moun卯m A. El-Yacoubi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10547v1-abstract-short" style="display: inline;"> Alzheimer&#39;s Disease (AD) is a prevalent neurodegenerative condition where early detection is vital. Handwriting, often affected early in AD, offers a non-invasive and cost-effective way to capture subtle motor changes. State-of-the-art research on handwriting, mostly online, based AD detection has predominantly relied on manually extracted features, fed as input to shallow machine learning models.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10547v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10547v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10547v1-abstract-full" style="display: none;"> Alzheimer&#39;s Disease (AD) is a prevalent neurodegenerative condition where early detection is vital. Handwriting, often affected early in AD, offers a non-invasive and cost-effective way to capture subtle motor changes. State-of-the-art research on handwriting, mostly online, based AD detection has predominantly relied on manually extracted features, fed as input to shallow machine learning models. Some recent works have proposed deep learning (DL)-based models, either 1D-CNN or 2D-CNN architectures, with performance comparing favorably to handcrafted schemes. These approaches, however, overlook the intrinsic relationship between the 2D spatial patterns of handwriting strokes and their 1D dynamic characteristics, thus limiting their capacity to capture the multimodal nature of handwriting data. Moreover, the application of Transformer models remains basically unexplored. To address these limitations, we propose a novel approach for AD detection, consisting of a learnable multimodal hybrid attention model that integrates simultaneously 2D handwriting images with 1D dynamic handwriting signals. Our model leverages a gated mechanism to combine similarity and difference attention, blending the two modalities and learning robust features by incorporating information at different scales. Our model achieved state-of-the-art performance on the DARWIN dataset, with an F1-score of 90.32\% and accuracy of 90.91\% in Task 8 (&#39;L&#39; writing), surpassing the previous best by 4.61% and 6.06% respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10547v1-abstract-full').style.display = 'none'; document.getElementById('2410.10547v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07538">arXiv:2410.07538</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.07538">pdf</a>, <a href="https://arxiv.org/format/2410.07538">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Rank Aggregation in Crowdsourcing for Listwise Annotations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+W">Wenshui Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Haoyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yongliang Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+T">Tao Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=wan%2C+S">Sheng wan</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+R">Runze Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+M">Minmin Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Cong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+C">Changjie Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07538v1-abstract-short" style="display: inline;"> Rank aggregation through crowdsourcing has recently gained significant attention, particularly in the context of listwise ranking annotations. However, existing methods primarily focus on a single problem and partial ranks, while the aggregation of listwise full ranks across numerous problems remains largely unexplored. This scenario finds relevance in various applications, such as model quality a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07538v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07538v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07538v1-abstract-full" style="display: none;"> Rank aggregation through crowdsourcing has recently gained significant attention, particularly in the context of listwise ranking annotations. However, existing methods primarily focus on a single problem and partial ranks, while the aggregation of listwise full ranks across numerous problems remains largely unexplored. This scenario finds relevance in various applications, such as model quality assessment and reinforcement learning with human feedback. In light of practical needs, we propose LAC, a Listwise rank Aggregation method in Crowdsourcing, where the global position information is carefully measured and included. In our design, an especially proposed annotation quality indicator is employed to measure the discrepancy between the annotated rank and the true rank. We also take the difficulty of the ranking problem itself into consideration, as it directly impacts the performance of annotators and consequently influences the final results. To our knowledge, LAC is the first work to directly deal with the full rank aggregation problem in listwise crowdsourcing, and simultaneously infer the difficulty of problems, the ability of annotators, and the ground-truth ranks in an unsupervised way. To evaluate our method, we collect a real-world business-oriented dataset for paragraph ranking. Experimental results on both synthetic and real-world benchmark datasets demonstrate the effectiveness of our proposed LAC method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07538v1-abstract-full').style.display = 'none'; document.getElementById('2410.07538v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00455">arXiv:2410.00455</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.00455">pdf</a>, <a href="https://arxiv.org/format/2410.00455">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Fine-Grained Vectorized Merge Sorting on RISC-V: From Register to Cache </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jincheng Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+D">Di Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chunye Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00455v1-abstract-short" style="display: inline;"> Merge sort as a divide-sort-merge paradigm has been widely applied in computer science fields. As modern reduced instruction set computing architectures like the fifth generation (RISC-V) regard multiple registers as a vector register group for wide instruction parallelism, optimizing merge sort with this vectorized property is becoming increasingly common. In this paper, we overhaul the divide-so&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00455v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00455v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00455v1-abstract-full" style="display: none;"> Merge sort as a divide-sort-merge paradigm has been widely applied in computer science fields. As modern reduced instruction set computing architectures like the fifth generation (RISC-V) regard multiple registers as a vector register group for wide instruction parallelism, optimizing merge sort with this vectorized property is becoming increasingly common. In this paper, we overhaul the divide-sort-merge paradigm, from its register-level sort to the cache-aware merge, to develop a fine-grained RISC-V vectorized merge sort (RVMS). From the register-level view, the inline vectorized transpose instruction is missed in RISC-V, so implementing it efficiently is non-trivial. Besides, the vectorized comparisons do not always work well in the merging networks. Both issues primarily stem from the expensive data shuffle instruction. To bypass it, RVMS strides to take register data as the proxy of data shuffle to accelerate the transpose operation, and meanwhile replaces vectorized comparisons with scalar cousin for more light real value swap. On the other hand, as cache-aware merge makes larger data merge in the cache, most merge schemes have two drawbacks: the in-cache merge usually has low cache utilization, while the out-of-cache merging network remains an ineffectively symmetric structure. To this end, we propose the half-merge scheme to employ the auxiliary space of in-place merge to halve the footprint of naive merge sort, and meanwhile copy one sequence to this space to avoid the former data exchange. Furthermore, an asymmetric merging network is developed to adapt to two different input sizes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00455v1-abstract-full').style.display = 'none'; document.getElementById('2410.00455v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18512">arXiv:2409.18512</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.18512">pdf</a>, <a href="https://arxiv.org/format/2409.18512">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> EmoPro: A Prompt Selection Strategy for Emotional Expression in LM-based Speech Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Qiang%2C+C">Chunyu Qiang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tianrui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qiuyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yu Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xiaobao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Chenyang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chen Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18512v1-abstract-short" style="display: inline;"> Recent advancements in speech synthesis models, trained on extensive datasets, have demonstrated remarkable zero-shot capabilities. These models can control content, timbre, and emotion in generated speech based on prompt inputs. Despite these advancements, the choice of prompts significantly impacts the output quality, yet most existing selection schemes do not adequately address the control of e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18512v1-abstract-full').style.display = 'inline'; document.getElementById('2409.18512v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18512v1-abstract-full" style="display: none;"> Recent advancements in speech synthesis models, trained on extensive datasets, have demonstrated remarkable zero-shot capabilities. These models can control content, timbre, and emotion in generated speech based on prompt inputs. Despite these advancements, the choice of prompts significantly impacts the output quality, yet most existing selection schemes do not adequately address the control of emotional intensity. To address this question, this paper proposes a two-stage prompt selection strategy EmoPro, which is specifically designed for emotionally controllable speech synthesis. This strategy focuses on selecting highly expressive and high-quality prompts by evaluating them from four perspectives: emotional expression strength, speech quality, text-emotion consistency, and model generation performance. Experimental results show that prompts selected using the proposed method result in more emotionally expressive and engaging synthesized speech compared to those obtained through baseline. Audio samples and codes will be available at https://whyrrrrun.github.io/EmoPro/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18512v1-abstract-full').style.display = 'none'; document.getElementById('2409.18512v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05249">arXiv:2409.05249</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.05249">pdf</a>, <a href="https://arxiv.org/format/2409.05249">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> NetDPSyn: Synthesizing Network Traces under Differential Privacy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+D">Danyu Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J+Q">Joann Qiongna Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tianhao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05249v1-abstract-short" style="display: inline;"> As the utilization of network traces for the network measurement research becomes increasingly prevalent, concerns regarding privacy leakage from network traces have garnered the public&#39;s attention. To safeguard network traces, researchers have proposed the trace synthesis that retains the essential properties of the raw data. However, previous works also show that synthesis traces with generative&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05249v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05249v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05249v1-abstract-full" style="display: none;"> As the utilization of network traces for the network measurement research becomes increasingly prevalent, concerns regarding privacy leakage from network traces have garnered the public&#39;s attention. To safeguard network traces, researchers have proposed the trace synthesis that retains the essential properties of the raw data. However, previous works also show that synthesis traces with generative models are vulnerable under linkage attacks. This paper introduces NetDPSyn, the first system to synthesize high-fidelity network traces under privacy guarantees. NetDPSyn is built with the Differential Privacy (DP) framework as its core, which is significantly different from prior works that apply DP when training the generative model. The experiments conducted on three flow and two packet datasets indicate that NetDPSyn achieves much better data utility in downstream tasks like anomaly detection. NetDPSyn is also 2.5 times faster than the other methods on average in data synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05249v1-abstract-full').style.display = 'none'; document.getElementById('2409.05249v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IMC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03970">arXiv:2409.03970</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.03970">pdf</a>, <a href="https://arxiv.org/format/2409.03970">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> A Hybrid Vectorized Merge Sort on ARM NEON </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jincheng Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+T">Tiaojie Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+D">Di Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chunye Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03970v1-abstract-short" style="display: inline;"> Sorting algorithms are the most extensively researched topics in computer science and serve for numerous practical applications. Although various sorts have been proposed for efficiency, different architectures offer distinct flavors to the implementation of parallel sorting. In this paper, we propose a hybrid vectorized merge sort on ARM NEON, named NEON Merge Sort for short (NEON-MS). In detail,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03970v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03970v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03970v1-abstract-full" style="display: none;"> Sorting algorithms are the most extensively researched topics in computer science and serve for numerous practical applications. Although various sorts have been proposed for efficiency, different architectures offer distinct flavors to the implementation of parallel sorting. In this paper, we propose a hybrid vectorized merge sort on ARM NEON, named NEON Merge Sort for short (NEON-MS). In detail, according to the granted register functions, we first identify the optimal register number to avoid the register-to-memory access, due to the write-back of intermediate outcomes. More importantly, following the generic merge sort framework that primarily uses sorting network for column sort and merging networks for three types of vectorized merge, we further improve their structures for high efficiency in an unified asymmetry way: 1) it makes the optimal sorting networks with few comparators become possible; 2) hybrid implementation of both serial and vectorized merges incurs the pipeline with merge instructions highly interleaved. Experiments on a single FT2000+ core show that NEON-MS is 3.8 and 2.1 times faster than std::sort and boost::block\_sort, respectively, on average. Additionally, as compared to the parallel version of the latter, NEON-MS gains an average speedup of 1.25. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03970v1-abstract-full').style.display = 'none'; document.getElementById('2409.03970v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICA3PP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10653">arXiv:2408.10653</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.10653">pdf</a>, <a href="https://arxiv.org/format/2408.10653">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> UIE-UnFold: Deep Unfolding Network with Color Priors and Vision Transformer for Underwater Image Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lei%2C+Y">Yingtie Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Jia Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Y">Yihang Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Changwei Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Z">Ziyang Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Pun%2C+C">Chi-Man Pun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10653v1-abstract-short" style="display: inline;"> Underwater image enhancement (UIE) plays a crucial role in various marine applications, but it remains challenging due to the complex underwater environment. Current learning-based approaches frequently lack explicit incorporation of prior knowledge about the physical processes involved in underwater image formation, resulting in limited optimization despite their impressive enhancement results. T&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10653v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10653v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10653v1-abstract-full" style="display: none;"> Underwater image enhancement (UIE) plays a crucial role in various marine applications, but it remains challenging due to the complex underwater environment. Current learning-based approaches frequently lack explicit incorporation of prior knowledge about the physical processes involved in underwater image formation, resulting in limited optimization despite their impressive enhancement results. This paper proposes a novel deep unfolding network (DUN) for UIE that integrates color priors and inter-stage feature transformation to improve enhancement performance. The proposed DUN model combines the iterative optimization and reliability of model-based methods with the flexibility and representational power of deep learning, offering a more explainable and stable solution compared to existing learning-based UIE approaches. The proposed model consists of three key components: a Color Prior Guidance Block (CPGB) that establishes a mapping between color channels of degraded and original images, a Nonlinear Activation Gradient Descent Module (NAGDM) that simulates the underwater image degradation process, and an Inter Stage Feature Transformer (ISF-Former) that facilitates feature exchange between different network stages. By explicitly incorporating color priors and modeling the physical characteristics of underwater image formation, the proposed DUN model achieves more accurate and reliable enhancement results. Extensive experiments on multiple underwater image datasets demonstrate the superiority of the proposed model over state-of-the-art methods in both quantitative and qualitative evaluations. The proposed DUN-based approach offers a promising solution for UIE, enabling more accurate and reliable scientific analysis in marine research. The code is available at https://github.com/CXH-Research/UIE-UnFold. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10653v1-abstract-full').style.display = 'none'; document.getElementById('2408.10653v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by DSAA CIVIL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10264">arXiv:2408.10264</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.10264">pdf</a>, <a href="https://arxiv.org/format/2408.10264">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> OPDR: Order-Preserving Dimension Reduction for Semantic Embedding of Multimodal Scientific Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chengyu Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+G">Gefei Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+L">Luanzheng Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Tallent%2C+N">Nathan Tallent</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+D">Dongfang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10264v1-abstract-short" style="display: inline;"> One of the most common operations in multimodal scientific data management is searching for the $k$ most similar items (or, $k$-nearest neighbors, KNN) from the database after being provided a new item. Although recent advances of multimodal machine learning models offer a \textit{semantic} index, the so-called \textit{embedding vectors} mapped from the original multimodal data, the dimension of t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10264v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10264v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10264v1-abstract-full" style="display: none;"> One of the most common operations in multimodal scientific data management is searching for the $k$ most similar items (or, $k$-nearest neighbors, KNN) from the database after being provided a new item. Although recent advances of multimodal machine learning models offer a \textit{semantic} index, the so-called \textit{embedding vectors} mapped from the original multimodal data, the dimension of the resulting embedding vectors are usually on the order of hundreds or a thousand, which are impractically high for time-sensitive scientific applications. This work proposes to reduce the dimensionality of the output embedding vectors such that the set of top-$k$ nearest neighbors do not change in the lower-dimensional space, namely Order-Preserving Dimension Reduction (OPDR). In order to develop such an OPDR method, our central hypothesis is that by analyzing the intrinsic relationship among key parameters during the dimension-reduction map, a quantitative function may be constructed to reveal the correlation between the target (lower) dimensionality and other variables. To demonstrate the hypothesis, this paper first defines a formal measure function to quantify the KNN similarity for a specific vector, then extends the measure into an aggregate accuracy of the global metric spaces, and finally derives a closed-form function between the target (lower) dimensionality and other variables. We incorporate the closed-function into popular dimension-reduction methods, various distance metrics, and embedding models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10264v1-abstract-full').style.display = 'none'; document.getElementById('2408.10264v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05803">arXiv:2408.05803</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.05803">pdf</a>, <a href="https://arxiv.org/format/2408.05803">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TMI.2024.3435450">10.1109/TMI.2024.3435450 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Prototype Learning Guided Hybrid Network for Breast Tumor Segmentation in DCE-MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+L">Lei Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuzhong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiadong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Qian%2C+X">Xuejun Qian</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+K">Kun Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Z">Zhongxiang Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xing Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhenhui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zaiyi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+D">Dinggang Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05803v1-abstract-short" style="display: inline;"> Automated breast tumor segmentation on the basis of dynamic contrast-enhancement magnetic resonance imaging (DCE-MRI) has shown great promise in clinical practice, particularly for identifying the presence of breast disease. However, accurate segmentation of breast tumor is a challenging task, often necessitating the development of complex networks. To strike an optimal trade-off between computati&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05803v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05803v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05803v1-abstract-full" style="display: none;"> Automated breast tumor segmentation on the basis of dynamic contrast-enhancement magnetic resonance imaging (DCE-MRI) has shown great promise in clinical practice, particularly for identifying the presence of breast disease. However, accurate segmentation of breast tumor is a challenging task, often necessitating the development of complex networks. To strike an optimal trade-off between computational costs and segmentation performance, we propose a hybrid network via the combination of convolution neural network (CNN) and transformer layers. Specifically, the hybrid network consists of a encoder-decoder architecture by stacking convolution and decovolution layers. Effective 3D transformer layers are then implemented after the encoder subnetworks, to capture global dependencies between the bottleneck features. To improve the efficiency of hybrid network, two parallel encoder subnetworks are designed for the decoder and the transformer layers, respectively. To further enhance the discriminative capability of hybrid network, a prototype learning guided prediction module is proposed, where the category-specified prototypical features are calculated through on-line clustering. All learned prototypical features are finally combined with the features from decoder for tumor mask prediction. The experimental results on private and public DCE-MRI datasets demonstrate that the proposed hybrid network achieves superior performance than the state-of-the-art (SOTA) methods, while maintaining balance between segmentation accuracy and computation cost. Moreover, we demonstrate that automatically generated tumor masks can be effectively applied to identify HER2-positive subtype from HER2-negative subtype with the similar accuracy to the analysis based on manual tumor segmentation. The source code is available at https://github.com/ZhouL-lab/PLHN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05803v1-abstract-full').style.display = 'none'; document.getElementById('2408.05803v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2024,IEEE Transactions on Medical Imaging </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05758">arXiv:2408.05758</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.05758">pdf</a>, <a href="https://arxiv.org/format/2408.05758">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> VQ-CTAP: Cross-Modal Fine-Grained Sequence Representation Learning for Speech Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qiang%2C+C">Chunyu Qiang</a>, <a href="/search/cs?searchtype=author&amp;query=Geng%2C+W">Wang Geng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yi Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+R">Ruibo Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tianrui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qiuyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yi%2C+J">Jiangyan Yi</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+Z">Zhengqi Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chen Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Che%2C+H">Hao Che</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Longbiao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Dang%2C+J">Jianwu Dang</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+J">Jianhua Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05758v1-abstract-short" style="display: inline;"> Deep learning has brought significant improvements to the field of cross-modal representation learning. For tasks such as text-to-speech (TTS), voice conversion (VC), and automatic speech recognition (ASR), a cross-modal fine-grained (frame-level) sequence representation is desired, emphasizing the semantic content of the text modality while de-emphasizing the paralinguistic information of the spe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05758v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05758v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05758v1-abstract-full" style="display: none;"> Deep learning has brought significant improvements to the field of cross-modal representation learning. For tasks such as text-to-speech (TTS), voice conversion (VC), and automatic speech recognition (ASR), a cross-modal fine-grained (frame-level) sequence representation is desired, emphasizing the semantic content of the text modality while de-emphasizing the paralinguistic information of the speech modality. We propose a method called &#34;Vector Quantized Contrastive Token-Acoustic Pre-training (VQ-CTAP)&#34;, which uses the cross-modal aligned sequence transcoder to bring text and speech into a joint multimodal space, learning how to connect text and speech at the frame level. The proposed VQ-CTAP is a paradigm for cross-modal sequence representation learning, offering a promising solution for fine-grained generation and recognition tasks in speech processing. The VQ-CTAP can be directly applied to VC and ASR tasks without fine-tuning or additional structures. We propose a sequence-aware semantic connector, which connects multiple frozen pre-trained modules for the TTS task, exhibiting a plug-and-play capability. We design a stepping optimization strategy to ensure effective model convergence by gradually injecting and adjusting the influence of various loss components. Furthermore, we propose a semantic-transfer-wise paralinguistic consistency loss to enhance representational capabilities, allowing the model to better generalize to unseen data and capture the nuances of paralinguistic information. In addition, VQ-CTAP achieves high-compression speech coding at a rate of 25Hz from 24kHz input waveforms, which is a 960-fold reduction in the sampling rate. The audio demo is available at https://qiangchunyu.github.io/VQCTAP/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05758v1-abstract-full').style.display = 'none'; document.getElementById('2408.05758v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01784">arXiv:2408.01784</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.01784">pdf</a>, <a href="https://arxiv.org/format/2408.01784">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Graph Stochastic Neural Process for Inductive Few-shot Knowledge Graph Completion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Z">Zicheng Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+L">Linhao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+S">Shirui Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chengqi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01784v1-abstract-short" style="display: inline;"> Knowledge graphs (KGs) store enormous facts as relationships between entities. Due to the long-tailed distribution of relations and the incompleteness of KGs, there is growing interest in few-shot knowledge graph completion (FKGC). Existing FKGC methods often assume the existence of all entities in KGs, which may not be practical since new relations and entities can emerge over time. Therefore, we&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01784v1-abstract-full').style.display = 'inline'; document.getElementById('2408.01784v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01784v1-abstract-full" style="display: none;"> Knowledge graphs (KGs) store enormous facts as relationships between entities. Due to the long-tailed distribution of relations and the incompleteness of KGs, there is growing interest in few-shot knowledge graph completion (FKGC). Existing FKGC methods often assume the existence of all entities in KGs, which may not be practical since new relations and entities can emerge over time. Therefore, we focus on a more challenging task called inductive few-shot knowledge graph completion (I-FKGC), where both relations and entities during the test phase are unknown before. Inspired by the idea of inductive reasoning, we cast I-FKGC as an inductive reasoning problem. Specifically, we propose a novel Graph Stochastic Neural Process approach (GS-NP), which consists of two major modules. In the first module, to obtain a generalized hypothesis (e.g., shared subgraph), we present a neural process-based hypothesis extractor that models the joint distribution of hypothesis, from which we can sample a hypothesis for predictions. In the second module, based on the hypothesis, we propose a graph stochastic attention-based predictor to test if the triple in the query set aligns with the extracted hypothesis. Meanwhile, the predictor can generate an explanatory subgraph identified by the hypothesis. Finally, the training of these two modules is seamlessly combined into a unified objective function, of which the effectiveness is verified by theoretical analyses as well as empirical studies. Extensive experiments on three public datasets demonstrate that our method outperforms existing methods and derives new state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01784v1-abstract-full').style.display = 'none'; document.getElementById('2408.01784v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01548">arXiv:2408.01548</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.01548">pdf</a>, <a href="https://arxiv.org/format/2408.01548">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Trainable Pointwise Decoder Module for Point Cloud Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+B">Bike Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Tikanm%C3%A4ki%2C+A">Antti Tikanm盲ki</a>, <a href="/search/cs?searchtype=author&amp;query=R%C3%B6ning%2C+J">Juha R枚ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01548v1-abstract-short" style="display: inline;"> Point cloud segmentation (PCS) aims to make per-point predictions and enables robots and autonomous driving cars to understand the environment. The range image is a dense representation of a large-scale outdoor point cloud, and segmentation models built upon the image commonly execute efficiently. However, the projection of the point cloud onto the range image inevitably leads to dropping points b&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01548v1-abstract-full').style.display = 'inline'; document.getElementById('2408.01548v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01548v1-abstract-full" style="display: none;"> Point cloud segmentation (PCS) aims to make per-point predictions and enables robots and autonomous driving cars to understand the environment. The range image is a dense representation of a large-scale outdoor point cloud, and segmentation models built upon the image commonly execute efficiently. However, the projection of the point cloud onto the range image inevitably leads to dropping points because, at each image coordinate, only one point is kept despite multiple points being projected onto the same location. More importantly, it is challenging to assign correct predictions to the dropped points that belong to the classes different from the kept point class. Besides, existing post-processing methods, such as K-nearest neighbor (KNN) search and kernel point convolution (KPConv), cannot be trained with the models in an end-to-end manner or cannot process varying-density outdoor point clouds well, thereby enabling the models to achieve sub-optimal performance. To alleviate this problem, we propose a trainable pointwise decoder module (PDM) as the post-processing approach, which gathers weighted features from the neighbors and then makes the final prediction for the query point. In addition, we introduce a virtual range image-guided copy-rotate-paste (VRCrop) strategy in data augmentation. VRCrop constrains the total number of points and eliminates undesirable artifacts in the augmented point cloud. With PDM and VRCrop, existing range image-based segmentation models consistently perform better than their counterparts on the SemanticKITTI, SemanticPOSS, and nuScenes datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01548v1-abstract-full').style.display = 'none'; document.getElementById('2408.01548v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">No comments</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13986">arXiv:2407.13986</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.13986">pdf</a>, <a href="https://arxiv.org/format/2407.13986">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Feature Surgery: Towards Accurate and Efficient Multi-Exit Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+Q">Qiuyang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Y">Ye Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuzhi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yufei Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Le Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13986v2-abstract-short" style="display: inline;"> Multi-exit network is a promising architecture for efficient model inference by sharing backbone networks and weights among multiple exits. However, the gradient conflict of the shared weights results in sub-optimal accuracy. This paper introduces Deep Feature Surgery (\methodname), which consists of feature partitioning and feature referencing approaches to resolve gradient conflict issues during&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13986v2-abstract-full').style.display = 'inline'; document.getElementById('2407.13986v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13986v2-abstract-full" style="display: none;"> Multi-exit network is a promising architecture for efficient model inference by sharing backbone networks and weights among multiple exits. However, the gradient conflict of the shared weights results in sub-optimal accuracy. This paper introduces Deep Feature Surgery (\methodname), which consists of feature partitioning and feature referencing approaches to resolve gradient conflict issues during the training of multi-exit networks. The feature partitioning separates shared features along the depth axis among all exits to alleviate gradient conflict while simultaneously promoting joint optimization for each exit. Subsequently, feature referencing enhances multi-scale features for distinct exits across varying depths to improve the model accuracy. Furthermore, \methodname~reduces the training operations with the reduced complexity of backpropagation. Experimental results on Cifar100 and ImageNet datasets exhibit that \methodname~provides up to a \textbf{50.00\%} reduction in training time and attains up to a \textbf{6.94\%} enhancement in accuracy when contrasted with baseline methods across diverse models and tasks. Budgeted batch classification evaluation on MSDNet demonstrates that DFS uses about $\mathbf{2}\boldsymbol{\times}$ fewer average FLOPs per image to achieve the same classification accuracy as baseline methods on Cifar100. The code is available at https://github.com/GongCheng1919/dfs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13986v2-abstract-full').style.display = 'none'; document.getElementById('2407.13986v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12857">arXiv:2407.12857</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.12857">pdf</a>, <a href="https://arxiv.org/format/2407.12857">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Automated Peer Reviewing in Paper SEA: Standardization, Evaluation, and Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Jianxiang Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Z">Zichen Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+J">Jiaqi Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+K">Kangyang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Weng%2C+Z">Zhenmin Weng</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chenghua Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+L">Long Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+R">Renjing Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+C">Chengcheng Han</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Q">Qiushi Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhiyong Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+Y">Yunshi Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12857v2-abstract-short" style="display: inline;"> In recent years, the rapid increase in scientific papers has overwhelmed traditional review mechanisms, resulting in varying quality of publications. Although existing methods have explored the capabilities of Large Language Models (LLMs) for automated scientific reviewing, their generated contents are often generic or partial. To address the issues above, we introduce an automated paper reviewing&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12857v2-abstract-full').style.display = 'inline'; document.getElementById('2407.12857v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12857v2-abstract-full" style="display: none;"> In recent years, the rapid increase in scientific papers has overwhelmed traditional review mechanisms, resulting in varying quality of publications. Although existing methods have explored the capabilities of Large Language Models (LLMs) for automated scientific reviewing, their generated contents are often generic or partial. To address the issues above, we introduce an automated paper reviewing framework SEA. It comprises of three modules: Standardization, Evaluation, and Analysis, which are represented by models SEA-S, SEA-E, and SEA-A, respectively. Initially, SEA-S distills data standardization capabilities of GPT-4 for integrating multiple reviews for a paper. Then, SEA-E utilizes standardized data for fine-tuning, enabling it to generate constructive reviews. Finally, SEA-A introduces a new evaluation metric called mismatch score to assess the consistency between paper contents and reviews. Moreover, we design a self-correction strategy to enhance the consistency. Extensive experimental results on datasets collected from eight venues show that SEA can generate valuable insights for authors to improve their papers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12857v2-abstract-full').style.display = 'none'; document.getElementById('2407.12857v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12383">arXiv:2407.12383</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.12383">pdf</a>, <a href="https://arxiv.org/format/2407.12383">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Reliable and Efficient Concept Erasure of Text-to-Image Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chao Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+Z">Zhipeng Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jingjing Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yu-Gang Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12383v2-abstract-short" style="display: inline;"> Text-to-image models encounter safety issues, including concerns related to copyright and Not-Safe-For-Work (NSFW) content. Despite several methods have been proposed for erasing inappropriate concepts from diffusion models, they often exhibit incomplete erasure, consume a lot of computing resources, and inadvertently damage generation ability. In this work, we introduce Reliable and Efficient Con&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12383v2-abstract-full').style.display = 'inline'; document.getElementById('2407.12383v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12383v2-abstract-full" style="display: none;"> Text-to-image models encounter safety issues, including concerns related to copyright and Not-Safe-For-Work (NSFW) content. Despite several methods have been proposed for erasing inappropriate concepts from diffusion models, they often exhibit incomplete erasure, consume a lot of computing resources, and inadvertently damage generation ability. In this work, we introduce Reliable and Efficient Concept Erasure (RECE), a novel approach that modifies the model in 3 seconds without necessitating additional fine-tuning. Specifically, RECE efficiently leverages a closed-form solution to derive new target embeddings, which are capable of regenerating erased concepts within the unlearned model. To mitigate inappropriate content potentially represented by derived embeddings, RECE further aligns them with harmless concepts in cross-attention layers. The derivation and erasure of new representation embeddings are conducted iteratively to achieve a thorough erasure of inappropriate concepts. Besides, to preserve the model&#39;s generation ability, RECE introduces an additional regularization term during the derivation process, resulting in minimizing the impact on unrelated concepts during the erasure process. All the processes above are in closed-form, guaranteeing extremely efficient erasure in only 3 seconds. Benchmarking against previous approaches, our method achieves more efficient and thorough erasure with minor damage to original generation ability and demonstrates enhanced robustness against red-teaming tools. Code is available at \url{https://github.com/CharlesGong12/RECE}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12383v2-abstract-full').style.display = 'none'; document.getElementById('2407.12383v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024 accepted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05869">arXiv:2407.05869</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.05869">pdf</a>, <a href="https://arxiv.org/format/2407.05869">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PORCA: Root Cause Analysis with Partially Observed Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chang Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+D">Di Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Wenbin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+L">Lanting Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Y">Yongtao Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+K">Kaiyu Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+P">Peng Han</a>, <a href="/search/cs?searchtype=author&amp;query=Bi%2C+J">Jingping Bi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05869v2-abstract-short" style="display: inline;"> Root Cause Analysis (RCA) aims at identifying the underlying causes of system faults by uncovering and analyzing the causal structure from complex systems. It has been widely used in many application domains. Reliable diagnostic conclusions are of great importance in mitigating system failures and financial losses. However, previous studies implicitly assume a full observation of the system, which&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05869v2-abstract-full').style.display = 'inline'; document.getElementById('2407.05869v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05869v2-abstract-full" style="display: none;"> Root Cause Analysis (RCA) aims at identifying the underlying causes of system faults by uncovering and analyzing the causal structure from complex systems. It has been widely used in many application domains. Reliable diagnostic conclusions are of great importance in mitigating system failures and financial losses. However, previous studies implicitly assume a full observation of the system, which neglect the effect of partial observation (i.e., missing nodes and latent malfunction). As a result, they fail in deriving reliable RCA results. In this paper, we unveil the issues of unobserved confounders and heterogeneity in partial observation and come up with a new problem of root cause analysis with partially observed data. To achieve this, we propose PORCA, a novel RCA framework which can explore reliable root causes under both unobserved confounders and unobserved heterogeneity. PORCA leverages magnified score-based causal discovery to efficiently optimize acyclic directed mixed graph under unobserved confounders. In addition, we also develop a heterogeneity-aware scheduling strategy to provide adaptive sample weights. Extensive experimental results on one synthetic and two real-world datasets demonstrate the effectiveness and superiority of the proposed framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05869v2-abstract-full').style.display = 'none'; document.getElementById('2407.05869v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04029">arXiv:2407.04029</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.04029">pdf</a>, <a href="https://arxiv.org/format/2407.04029">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Robust Learning under Hybrid Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wei%2C+Y">Yang Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Shuo Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+S">Shanshan Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+B">Bo Han</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04029v1-abstract-short" style="display: inline;"> Feature noise and label noise are ubiquitous in practical scenarios, which pose great challenges for training a robust machine learning model. Most previous approaches usually deal with only a single problem of either feature noise or label noise. However, in real-world applications, hybrid noise, which contains both feature noise and label noise, is very common due to the unreliable data collecti&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04029v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04029v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04029v1-abstract-full" style="display: none;"> Feature noise and label noise are ubiquitous in practical scenarios, which pose great challenges for training a robust machine learning model. Most previous approaches usually deal with only a single problem of either feature noise or label noise. However, in real-world applications, hybrid noise, which contains both feature noise and label noise, is very common due to the unreliable data collection and annotation processes. Although some results have been achieved by a few representation learning based attempts, this issue is still far from being addressed with promising performance and guaranteed theoretical analyses. To address the challenge, we propose a novel unified learning framework called &#34;Feature and Label Recovery&#34; (FLR) to combat the hybrid noise from the perspective of data recovery, where we concurrently reconstruct both the feature matrix and the label matrix of input data. Specifically, the clean feature matrix is discovered by the low-rank approximation, and the ground-truth label matrix is embedded based on the recovered features with a nuclear norm regularization. Meanwhile, the feature noise and label noise are characterized by their respective adaptive matrix norms to satisfy the corresponding maximum likelihood. As this framework leads to a non-convex optimization problem, we develop the non-convex Alternating Direction Method of Multipliers (ADMM) with the convergence guarantee to solve our learning objective. We also provide the theoretical analysis to show that the generalization error of FLR can be upper-bounded in the presence of hybrid noise. Experimental results on several typical benchmark datasets clearly demonstrate the superiority of our proposed method over the state-of-the-art robust learning approaches for various noises. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04029v1-abstract-full').style.display = 'none'; document.getElementById('2407.04029v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19065">arXiv:2406.19065</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.19065">pdf</a>, <a href="https://arxiv.org/format/2406.19065">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> STBench: Assessing the Ability of Large Language Models in Spatio-Temporal Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Wenbin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+D">Di Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+R">Ruibo Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenjie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Z">Zijie Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+C">Chengxue Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chang Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Jing%2C+Q">Quanliang Jing</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+H">Haining Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Bi%2C+J">Jingping Bi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19065v1-abstract-short" style="display: inline;"> The rapid evolution of large language models (LLMs) holds promise for reforming the methodology of spatio-temporal data mining. However, current works for evaluating the spatio-temporal understanding capability of LLMs are somewhat limited and biased. These works either fail to incorporate the latest language models or only focus on assessing the memorized spatio-temporal knowledge. To address thi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19065v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19065v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19065v1-abstract-full" style="display: none;"> The rapid evolution of large language models (LLMs) holds promise for reforming the methodology of spatio-temporal data mining. However, current works for evaluating the spatio-temporal understanding capability of LLMs are somewhat limited and biased. These works either fail to incorporate the latest language models or only focus on assessing the memorized spatio-temporal knowledge. To address this gap, this paper dissects LLMs&#39; capability of spatio-temporal data into four distinct dimensions: knowledge comprehension, spatio-temporal reasoning, accurate computation, and downstream applications. We curate several natural language question-answer tasks for each category and build the benchmark dataset, namely STBench, containing 13 distinct tasks and over 60,000 QA pairs. Moreover, we have assessed the capabilities of 13 LLMs, such as GPT-4o, Gemma and Mistral. Experimental results reveal that existing LLMs show remarkable performance on knowledge comprehension and spatio-temporal reasoning tasks, with potential for further enhancement on other tasks through in-context learning, chain-of-though prompting, and fine-tuning. The code and datasets of STBench are released on https://github.com/LwbXc/STBench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19065v1-abstract-full').style.display = 'none'; document.getElementById('2406.19065v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18924">arXiv:2406.18924</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.18924">pdf</a>, <a href="https://arxiv.org/format/2406.18924">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Learning Pareto Set for Multi-Objective Continuous Robot Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shu%2C+T">Tianye Shu</a>, <a href="/search/cs?searchtype=author&amp;query=Shang%2C+K">Ke Shang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Nan%2C+Y">Yang Nan</a>, <a href="/search/cs?searchtype=author&amp;query=Ishibuchi%2C+H">Hisao Ishibuchi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18924v1-abstract-short" style="display: inline;"> For a control problem with multiple conflicting objectives, there exists a set of Pareto-optimal policies called the Pareto set instead of a single optimal policy. When a multi-objective control problem is continuous and complex, traditional multi-objective reinforcement learning (MORL) algorithms search for many Pareto-optimal deep policies to approximate the Pareto set, which is quite resource-c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18924v1-abstract-full').style.display = 'inline'; document.getElementById('2406.18924v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18924v1-abstract-full" style="display: none;"> For a control problem with multiple conflicting objectives, there exists a set of Pareto-optimal policies called the Pareto set instead of a single optimal policy. When a multi-objective control problem is continuous and complex, traditional multi-objective reinforcement learning (MORL) algorithms search for many Pareto-optimal deep policies to approximate the Pareto set, which is quite resource-consuming. In this paper, we propose a simple and resource-efficient MORL algorithm that learns a continuous representation of the Pareto set in a high-dimensional policy parameter space using a single hypernet. The learned hypernet can directly generate various well-trained policy networks for different user preferences. We compare our method with two state-of-the-art MORL algorithms on seven multi-objective continuous robot control problems. Experimental results show that our method achieves the best overall performance with the least training parameters. An interesting observation is that the Pareto set is well approximated by a curved line or surface in a high-dimensional parameter space. This observation will provide insight for researchers to design new MORL algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18924v1-abstract-full').style.display = 'none'; document.getElementById('2406.18924v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16728">arXiv:2406.16728</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.16728">pdf</a>, <a href="https://arxiv.org/format/2406.16728">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3616855.3635766">10.1145/3616855.3635766 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> CausalMMM: Learning Causal Structure for Marketing Mix Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chang Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+D">Di Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Sheng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Wenbin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+Y">Yueyang Su</a>, <a href="/search/cs?searchtype=author&amp;query=Bi%2C+J">Jingping Bi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16728v1-abstract-short" style="display: inline;"> In online advertising, marketing mix modeling (MMM) is employed to predict the gross merchandise volume (GMV) of brand shops and help decision-makers to adjust the budget allocation of various advertising channels. Traditional MMM methods leveraging regression techniques can fail in handling the complexity of marketing. Although some efforts try to encode the causal structures for better predictio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16728v1-abstract-full').style.display = 'inline'; document.getElementById('2406.16728v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16728v1-abstract-full" style="display: none;"> In online advertising, marketing mix modeling (MMM) is employed to predict the gross merchandise volume (GMV) of brand shops and help decision-makers to adjust the budget allocation of various advertising channels. Traditional MMM methods leveraging regression techniques can fail in handling the complexity of marketing. Although some efforts try to encode the causal structures for better prediction, they have the strict restriction that causal structures are prior-known and unchangeable. In this paper, we define a new causal MMM problem that automatically discovers the interpretable causal structures from data and yields better GMV predictions. To achieve causal MMM, two essential challenges should be addressed: (1) Causal Heterogeneity. The causal structures of different kinds of shops vary a lot. (2) Marketing Response Patterns. Various marketing response patterns i.e., carryover effect and shape effect, have been validated in practice. We argue that causal MMM needs dynamically discover specific causal structures for different shops and the predictions should comply with the prior known marketing response patterns. Thus, we propose CausalMMM that integrates Granger causality in a variational inference framework to measure the causal relationships between different channels and predict the GMV with the regularization of both temporal and saturation marketing response patterns. Extensive experiments show that CausalMMM can not only achieve superior performance of causal structure learning on synthetic datasets with improvements of 5.7%\sim 7.1%, but also enhance the GMV prediction results on a representative E-commerce platform. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16728v1-abstract-full').style.display = 'none'; document.getElementById('2406.16728v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WSDM 2024, full version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15877">arXiv:2406.15877</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.15877">pdf</a>, <a href="https://arxiv.org/format/2406.15877">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BigCodeBench: Benchmarking Code Generation with Diverse Function Calls and Complex Instructions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhuo%2C+T+Y">Terry Yue Zhuo</a>, <a href="/search/cs?searchtype=author&amp;query=Vu%2C+M+C">Minh Chien Vu</a>, <a href="/search/cs?searchtype=author&amp;query=Chim%2C+J">Jenny Chim</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+H">Han Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+W">Wenhao Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Widyasari%2C+R">Ratnadira Widyasari</a>, <a href="/search/cs?searchtype=author&amp;query=Yusuf%2C+I+N+B">Imam Nur Bani Yusuf</a>, <a href="/search/cs?searchtype=author&amp;query=Zhan%2C+H">Haolan Zhan</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Junda He</a>, <a href="/search/cs?searchtype=author&amp;query=Paul%2C+I">Indraneil Paul</a>, <a href="/search/cs?searchtype=author&amp;query=Brunner%2C+S">Simon Brunner</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Hoang%2C+T">Thong Hoang</a>, <a href="/search/cs?searchtype=author&amp;query=Zebaze%2C+A+R">Armel Randy Zebaze</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+X">Xiaoheng Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Wen-Ding Li</a>, <a href="/search/cs?searchtype=author&amp;query=Kaddour%2C+J">Jean Kaddour</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+M">Ming Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhihan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yadav%2C+P">Prateek Yadav</a>, <a href="/search/cs?searchtype=author&amp;query=Jain%2C+N">Naman Jain</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+A">Alex Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+Z">Zhoujun Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qian Liu</a> , et al. (8 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15877v3-abstract-short" style="display: inline;"> Task automation has been greatly empowered by the recent advances in Large Language Models (LLMs) via Python code, where the tasks ranging from software engineering development to general-purpose reasoning. While current benchmarks have shown that LLMs can solve tasks using programs like human developers, the majority of their evaluations are limited to short and self-contained algorithmic tasks o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15877v3-abstract-full').style.display = 'inline'; document.getElementById('2406.15877v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15877v3-abstract-full" style="display: none;"> Task automation has been greatly empowered by the recent advances in Large Language Models (LLMs) via Python code, where the tasks ranging from software engineering development to general-purpose reasoning. While current benchmarks have shown that LLMs can solve tasks using programs like human developers, the majority of their evaluations are limited to short and self-contained algorithmic tasks or standalone function calls. Solving challenging and practical requires the capability of utilizing diverse function calls as tools to efficiently implement functionalities like data analysis and web development. In addition, using multiple tools to solve a task needs compositional reasoning by accurately understanding complex instructions. Fulfilling both of these characteristics can pose a great challenge for LLMs.To assess how well LLMs can solve challenging and practical tasks via programs, we introduce BigCodeBench, a benchmark that challenges LLMs to invoke multiple function calls as tools from 139 libraries and 7 domains for 1,140 fine-grained tasks. To evaluate LLMs rigorously, each task encompasses 5.6 test cases with an average branch coverage of 99%. In addition, we propose a natural-language-oriented variant of BigCodeBench, BigCodeBench-Instruct, that automatically transforms the original docstrings into short instructions only with essential information. Our extensive evaluation of 60 LLMs shows that LLMs are not yet capable of following complex instructions to use function calls precisely, with scores up to 60%, significantly lower than the human performance of 97%. The results underscore the need for further advancements in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15877v3-abstract-full').style.display = 'none'; document.getElementById('2406.15877v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">44 pages, 14 figures, 7 tables, built with love by the BigCode community :)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14255">arXiv:2406.14255</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.14255">pdf</a>, <a href="https://arxiv.org/format/2406.14255">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3637528.3671579">10.1145/3637528.3671579 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DuMapNet: An End-to-End Vectorization System for City-Scale Lane-Level Map Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xia%2C+D">Deguo Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Weiming Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiyan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chenting Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jizhou Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+M">Mengmeng Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+D">Diange Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14255v1-abstract-short" style="display: inline;"> Generating city-scale lane-level maps faces significant challenges due to the intricate urban environments, such as blurred or absent lane markings. Additionally, a standard lane-level map requires a comprehensive organization of lane groupings, encompassing lane direction, style, boundary, and topology, yet has not been thoroughly examined in prior research. These obstacles result in labor-intens&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14255v1-abstract-full').style.display = 'inline'; document.getElementById('2406.14255v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14255v1-abstract-full" style="display: none;"> Generating city-scale lane-level maps faces significant challenges due to the intricate urban environments, such as blurred or absent lane markings. Additionally, a standard lane-level map requires a comprehensive organization of lane groupings, encompassing lane direction, style, boundary, and topology, yet has not been thoroughly examined in prior research. These obstacles result in labor-intensive human annotation and high maintenance costs. This paper overcomes these limitations and presents an industrial-grade solution named DuMapNet that outputs standardized, vectorized map elements and their topology in an end-to-end paradigm. To this end, we propose a group-wise lane prediction (GLP) system that outputs vectorized results of lane groups by meticulously tailoring a transformer-based network. Meanwhile, to enhance generalization in challenging scenarios, such as road wear and occlusions, as well as to improve global consistency, a contextual prompts encoder (CPE) module is proposed, which leverages the predicted results of spatial neighborhoods as contextual information. Extensive experiments conducted on large-scale real-world datasets demonstrate the superiority and effectiveness of DuMapNet. Additionally, DuMap-Net has already been deployed in production at Baidu Maps since June 2023, supporting lane-level map generation tasks for over 360 cities while bringing a 95% reduction in costs. This demonstrates that DuMapNet serves as a practical and cost-effective industrial solution for city-scale lane-level map generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14255v1-abstract-full').style.display = 'none'; document.getElementById('2406.14255v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by KDD 2024, camera-ready version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08911">arXiv:2406.08911</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.08911">pdf</a>, <a href="https://arxiv.org/format/2406.08911">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> An Initial Investigation of Language Adaptation for TTS Systems under Low-resource Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Cooper%2C+E">Erica Cooper</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Qiang%2C+C">Chunyu Qiang</a>, <a href="/search/cs?searchtype=author&amp;query=Geng%2C+M">Mengzhe Geng</a>, <a href="/search/cs?searchtype=author&amp;query=Wells%2C+D">Dan Wells</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Longbiao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Dang%2C+J">Jianwu Dang</a>, <a href="/search/cs?searchtype=author&amp;query=Tessier%2C+M">Marc Tessier</a>, <a href="/search/cs?searchtype=author&amp;query=Pine%2C+A">Aidan Pine</a>, <a href="/search/cs?searchtype=author&amp;query=Richmond%2C+K">Korin Richmond</a>, <a href="/search/cs?searchtype=author&amp;query=Yamagishi%2C+J">Junichi Yamagishi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08911v1-abstract-short" style="display: inline;"> Self-supervised learning (SSL) representations from massively multilingual models offer a promising solution for low-resource language speech tasks. Despite advancements, language adaptation in TTS systems remains an open problem. This paper explores the language adaptation capability of ZMM-TTS, a recent SSL-based multilingual TTS system proposed in our previous work. We conducted experiments on&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08911v1-abstract-full').style.display = 'inline'; document.getElementById('2406.08911v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08911v1-abstract-full" style="display: none;"> Self-supervised learning (SSL) representations from massively multilingual models offer a promising solution for low-resource language speech tasks. Despite advancements, language adaptation in TTS systems remains an open problem. This paper explores the language adaptation capability of ZMM-TTS, a recent SSL-based multilingual TTS system proposed in our previous work. We conducted experiments on 12 languages using limited data with various fine-tuning configurations. We demonstrate that the similarity in phonetics between the pre-training and target languages, as well as the language category, affects the target language&#39;s adaptation performance. Additionally, we find that the fine-tuning dataset size and number of speakers influence adaptability. Surprisingly, we also observed that using paired data for fine-tuning is not always optimal compared to audio-only data. Beyond speech intelligibility, our analysis covers speaker similarity, language identification, and predicted MOS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08911v1-abstract-full').style.display = 'none'; document.getElementById('2406.08911v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18739">arXiv:2405.18739</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.18739">pdf</a>, <a href="https://arxiv.org/format/2405.18739">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> FlocOff: Data Heterogeneity Resilient Federated Learning with Communication-Efficient Edge Offloading </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Mulei Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chenyu Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+L">Liekang Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+L">Liantao Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18739v1-abstract-short" style="display: inline;"> Federated Learning (FL) has emerged as a fundamental learning paradigm to harness massive data scattered at geo-distributed edge devices in a privacy-preserving way. Given the heterogeneous deployment of edge devices, however, their data are usually Non-IID, introducing significant challenges to FL including degraded training accuracy, intensive communication costs, and high computing complexity.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18739v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18739v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18739v1-abstract-full" style="display: none;"> Federated Learning (FL) has emerged as a fundamental learning paradigm to harness massive data scattered at geo-distributed edge devices in a privacy-preserving way. Given the heterogeneous deployment of edge devices, however, their data are usually Non-IID, introducing significant challenges to FL including degraded training accuracy, intensive communication costs, and high computing complexity. Towards that, traditional approaches typically utilize adaptive mechanisms, which may suffer from scalability issues, increased computational overhead, and limited adaptability to diverse edge environments. To address that, this paper instead leverages the observation that the computation offloading involves inherent functionalities such as node matching and service correlation to achieve data reshaping and proposes Federated learning based on computing Offloading (FlocOff) framework, to address data heterogeneity and resource-constrained challenges. Specifically, FlocOff formulates the FL process with Non-IID data in edge scenarios and derives rigorous analysis on the impact of imbalanced data distribution. Based on this, FlocOff decouples the optimization in two steps, namely : (1) Minimizes the Kullback-Leibler (KL) divergence via Computation Offloading scheduling (MKL-CO); (2) Minimizes the Communication Cost through Resource Allocation (MCC-RA). Extensive experimental results demonstrate that the proposed FlocOff effectively improves model convergence and accuracy by 14.3\%-32.7\% while reducing data heterogeneity under various data distributions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18739v1-abstract-full').style.display = 'none'; document.getElementById('2405.18739v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16071">arXiv:2405.16071</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.16071">pdf</a>, <a href="https://arxiv.org/format/2405.16071">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DynRefer: Delving into Region-level Multi-modality Tasks via Dynamic Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yuzhong Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Feng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yue Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liao%2C+M">Mingxiang Liao</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Q">Qixiang Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+F">Fang Wan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16071v1-abstract-short" style="display: inline;"> Region-level multi-modality methods can translate referred image regions to human preferred language descriptions. Unfortunately, most of existing methods using fixed visual inputs remain lacking the resolution adaptability to find out precise language descriptions. In this study, we propose a dynamic resolution approach, referred to as DynRefer, to pursue high-accuracy region-level referring thro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16071v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16071v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16071v1-abstract-full" style="display: none;"> Region-level multi-modality methods can translate referred image regions to human preferred language descriptions. Unfortunately, most of existing methods using fixed visual inputs remain lacking the resolution adaptability to find out precise language descriptions. In this study, we propose a dynamic resolution approach, referred to as DynRefer, to pursue high-accuracy region-level referring through mimicking the resolution adaptability of human visual cognition. DynRefer first implements stochastic vision-language alignment. It aligns desired language descriptions of multi-modality tasks with images of stochastic resolution, which are constructed by nesting a set of views around the referred region. DynRefer then implements dynamic multi-modality referring, which is realized by selecting views based on image and language priors. This allows the visual information used for referring to better match human preferences, thereby improving the representational adaptability of region-level multi-modality models. Extensive experiments show that DynRefer brings mutual improvement upon tasks including region-level captioning, open-vocabulary region recognition and attribute detection. Last but not least, DynRefer achieves new state-of-the-art on multiple region-level multi-modality tasks using a single model. Code is available at https://github.com/callsys/DynRefer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16071v1-abstract-full').style.display = 'none'; document.getElementById('2405.16071v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code is available at https://github.com/callsys/DynRefer</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10492">arXiv:2405.10492</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.10492">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Automatic News Generation and Fact-Checking System Based on Language Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Peng%2C+X">Xirui Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Q">Qiming Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Z">Zheng Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+H">Haopeng Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+L">Lianghao Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yan Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zecheng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chenwei Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yingqiao Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10492v2-abstract-short" style="display: inline;"> This paper explores an automatic news generation and fact-checking system based on language processing, aimed at enhancing the efficiency and quality of news production while ensuring the authenticity and reliability of the news content. With the rapid development of Natural Language Processing (NLP) and deep learning technologies, automatic news generation systems are capable of extracting key in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10492v2-abstract-full').style.display = 'inline'; document.getElementById('2405.10492v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10492v2-abstract-full" style="display: none;"> This paper explores an automatic news generation and fact-checking system based on language processing, aimed at enhancing the efficiency and quality of news production while ensuring the authenticity and reliability of the news content. With the rapid development of Natural Language Processing (NLP) and deep learning technologies, automatic news generation systems are capable of extracting key information from massive data and generating well-structured, fluent news articles. Meanwhile, by integrating fact-checking technology, the system can effectively prevent the spread of false news and improve the accuracy and credibility of news. This study details the key technologies involved in automatic news generation and factchecking, including text generation, information extraction, and the application of knowledge graphs, and validates the effectiveness of these technologies through experiments. Additionally, the paper discusses the future development directions of automatic news generation and fact-checking systems, emphasizing the importance of further integration and innovation of technologies. The results show that with continuous technological optimization and practical application, these systems will play an increasingly important role in the future news industry, providing more efficient and reliable news services. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10492v2-abstract-full').style.display = 'none'; document.getElementById('2405.10492v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.5; H.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10175">arXiv:2405.10175</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.10175">pdf</a>, <a href="https://arxiv.org/format/2405.10175">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Filling Missing Values Matters for Range Image-Based Point Cloud Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+B">Bike Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=R%C3%B6ning%2C+J">Juha R枚ning</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10175v2-abstract-short" style="display: inline;"> Point cloud segmentation (PCS) plays an essential role in robot perception and navigation tasks. To efficiently understand large-scale outdoor point clouds, their range image representation is commonly adopted. This image-like representation is compact and structured, making range image-based PCS models practical. However, undesirable missing values in the range images damage the shapes and patter&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10175v2-abstract-full').style.display = 'inline'; document.getElementById('2405.10175v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10175v2-abstract-full" style="display: none;"> Point cloud segmentation (PCS) plays an essential role in robot perception and navigation tasks. To efficiently understand large-scale outdoor point clouds, their range image representation is commonly adopted. This image-like representation is compact and structured, making range image-based PCS models practical. However, undesirable missing values in the range images damage the shapes and patterns of objects. This problem creates difficulty for the models in learning coherent and complete geometric information from the objects. Consequently, the PCS models only achieve inferior performance. Delving deeply into this issue, we find that the use of unreasonable projection approaches and deskewing scans mainly leads to unwanted missing values in the range images. Besides, almost all previous works fail to consider filling in the unexpected missing values in the PCS task. To alleviate this problem, we first propose a new projection method, namely scan unfolding++ (SU++), to avoid massive missing values in the generated range images. Then, we introduce a simple yet effective approach, namely range-dependent $K$-nearest neighbor interpolation ($K$NNI), to further fill in missing values. Finally, we introduce the Filling Missing Values Network (FMVNet) and Fast FMVNet. Extensive experimental results on SemanticKITTI, SemanticPOSS, and nuScenes datasets demonstrate that by employing the proposed SU++ and $K$NNI, existing range image-based PCS models consistently achieve better performance than the baseline models. Besides, both FMVNet and Fast FMVNet achieve state-of-the-art performance in terms of the speed-accuracy trade-off. The proposed methods can be applied to other range image-based tasks and practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10175v2-abstract-full').style.display = 'none'; document.getElementById('2405.10175v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">No Comments</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04513">arXiv:2405.04513</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.04513">pdf</a>, <a href="https://arxiv.org/format/2405.04513">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Switchable Decision: Dynamic Neural Generation Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shujian Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Tanwisuth%2C+K">Korawat Tanwisuth</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chengyue Gong</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+P">Pengcheng He</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+M">Mingyuan Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04513v1-abstract-short" style="display: inline;"> Auto-regressive generation models achieve competitive performance across many different NLP tasks such as summarization, question answering, and classifications. However, they are also known for being slow in inference, which makes them challenging to deploy in real-time applications. We propose a switchable decision to accelerate inference by dynamically assigning computation resources for each d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04513v1-abstract-full').style.display = 'inline'; document.getElementById('2405.04513v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04513v1-abstract-full" style="display: none;"> Auto-regressive generation models achieve competitive performance across many different NLP tasks such as summarization, question answering, and classifications. However, they are also known for being slow in inference, which makes them challenging to deploy in real-time applications. We propose a switchable decision to accelerate inference by dynamically assigning computation resources for each data instance. Automatically making decisions on where to skip and how to balance quality and computation cost with constrained optimization, our dynamic neural generation networks enforce the efficient inference path and determine the optimized trade-off. Experiments across question answering, summarization, and classification benchmarks show that our method benefits from less computation cost during inference while keeping the same accuracy. Extensive experiments and ablation studies demonstrate that our method can be general, effective, and beneficial for many NLP tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04513v1-abstract-full').style.display = 'none'; document.getElementById('2405.04513v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17820">arXiv:2404.17820</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.17820">pdf</a>, <a href="https://arxiv.org/format/2404.17820">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1002/rob.22345">10.1002/rob.22345 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Motion planning for off-road autonomous driving based on human-like cognition and weight adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yuchun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+J">Jianwei Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+P">Peng Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17820v1-abstract-short" style="display: inline;"> Driving in an off-road environment is challenging for autonomous vehicles due to the complex and varied terrain. To ensure stable and efficient travel, the vehicle requires consideration and balancing of environmental factors, such as undulations, roughness, and obstacles, to generate optimal trajectories that can adapt to changing scenarios. However, traditional motion planners often utilize a fi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17820v1-abstract-full').style.display = 'inline'; document.getElementById('2404.17820v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17820v1-abstract-full" style="display: none;"> Driving in an off-road environment is challenging for autonomous vehicles due to the complex and varied terrain. To ensure stable and efficient travel, the vehicle requires consideration and balancing of environmental factors, such as undulations, roughness, and obstacles, to generate optimal trajectories that can adapt to changing scenarios. However, traditional motion planners often utilize a fixed cost function for trajectory optimization, making it difficult to adapt to different driving strategies in challenging irregular terrains and uncommon scenarios. To address these issues, we propose an adaptive motion planner based on human-like cognition and cost evaluation for off-road driving. First, we construct a multi-layer map describing different features of off-road terrains, including terrain elevation, roughness, obstacle, and artificial potential field map. Subsequently, we employ a CNN-LSTM network to learn the trajectories planned by human drivers in various off-road scenarios. Then, based on human-like generated trajectories in different environments, we design a primitive-based trajectory planner that aims to mimic human trajectories and cost weight selection, generating trajectories that are consistent with the dynamics of off-road vehicles. Finally, we compute optimal cost weights and select and extend behavioral primitives to generate highly adaptive, stable, and efficient trajectories. We validate the effectiveness of the proposed method through experiments in a desert off-road environment with complex terrain and varying road conditions. The experimental results show that the proposed human-like motion planner has excellent adaptability to different off-road conditions. It shows real-time operation, greater stability, and more human-like planning ability in diverse and challenging scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17820v1-abstract-full').style.display = 'none'; document.getElementById('2404.17820v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Field Robotics,2024,1-22 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17198">arXiv:2404.17198</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.17198">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TVT.2024.3382309">10.1109/TVT.2024.3382309 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Beyond Imitation: A Life-long Policy Learning Framework for Path Tracking Control of Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">C. Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+C">C. Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Z. Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Z. Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+J">J. Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">X. Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17198v1-abstract-short" style="display: inline;"> Model-free learning-based control methods have recently shown significant advantages over traditional control methods in avoiding complex vehicle characteristic estimation and parameter tuning. As a primary policy learning method, imitation learning (IL) is capable of learning control policies directly from expert demonstrations. However, the performance of IL policies is highly dependent on the d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17198v1-abstract-full').style.display = 'inline'; document.getElementById('2404.17198v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17198v1-abstract-full" style="display: none;"> Model-free learning-based control methods have recently shown significant advantages over traditional control methods in avoiding complex vehicle characteristic estimation and parameter tuning. As a primary policy learning method, imitation learning (IL) is capable of learning control policies directly from expert demonstrations. However, the performance of IL policies is highly dependent on the data sufficiency and quality of the demonstrations. To alleviate the above problems of IL-based policies, a lifelong policy learning (LLPL) framework is proposed in this paper, which extends the IL scheme with lifelong learning (LLL). First, a novel IL-based model-free control policy learning method for path tracking is introduced. Even with imperfect demonstration, the optimal control policy can be learned directly from historical driving data. Second, by using the LLL method, the pre-trained IL policy can be safely updated and fine-tuned with incremental execution knowledge. Third, a knowledge evaluation method for policy learning is introduced to avoid learning redundant or inferior knowledge, thus ensuring the performance improvement of online policy learning. Experiments are conducted using a high-fidelity vehicle dynamic model in various scenarios to evaluate the performance of the proposed method. The results show that the proposed LLPL framework can continuously improve the policy performance with collected incremental driving data, and achieves the best accuracy and control smoothness compared to other baseline methods after evolving on a 7 km curved road. Through learning and evaluation with noisy real-life data collected in an off-road environment, the proposed LLPL framework also demonstrates its applicability in learning and evolving in real-life scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17198v1-abstract-full').style.display = 'none'; document.getElementById('2404.17198v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Vehicular Technology 2024 Pages 1-14 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.12530">arXiv:2404.12530</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.12530">pdf</a>, <a href="https://arxiv.org/format/2404.12530">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> TrajDeleter: Enabling Trajectory Forgetting in Offline Reinforcement Learning Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+K">Kecen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+J">Jin Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tianhao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.12530v2-abstract-short" style="display: inline;"> Reinforcement learning (RL) trains an agent from experiences interacting with the environment. In scenarios where online interactions are impractical, offline RL, which trains the agent using pre-collected datasets, has become popular. While this new paradigm presents remarkable effectiveness across various real-world domains, like healthcare and energy management, there is a growing demand to ena&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.12530v2-abstract-full').style.display = 'inline'; document.getElementById('2404.12530v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.12530v2-abstract-full" style="display: none;"> Reinforcement learning (RL) trains an agent from experiences interacting with the environment. In scenarios where online interactions are impractical, offline RL, which trains the agent using pre-collected datasets, has become popular. While this new paradigm presents remarkable effectiveness across various real-world domains, like healthcare and energy management, there is a growing demand to enable agents to rapidly and completely eliminate the influence of specific trajectories from both the training dataset and the trained agents. To meet this problem, this paper advocates Trajdeleter, the first practical approach to trajectory unlearning for offline RL agents. The key idea of Trajdeleter is to guide the agent to demonstrate deteriorating performance when it encounters states associated with unlearning trajectories. Simultaneously, it ensures the agent maintains its original performance level when facing other remaining trajectories. Additionally, we introduce Trajauditor, a simple yet efficient method to evaluate whether Trajdeleter successfully eliminates the specific trajectories of influence from the offline RL agent. Extensive experiments conducted on six offline RL algorithms and three tasks demonstrate that Trajdeleter requires only about 1.5% of the time needed for retraining from scratch. It effectively unlearns an average of 94.8% of the targeted trajectories yet still performs well in actual environment interactions after unlearning. The replication package and agent parameters are available online. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.12530v2-abstract-full').style.display = 'none'; document.getElementById('2404.12530v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NDSS 2025. The presented document here is the full version of our paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.10464">arXiv:2404.10464</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.10464">pdf</a>, <a href="https://arxiv.org/format/2404.10464">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DESTEIN: Navigating Detoxification of Language Models via Universal Steering Pairs and Head-wise Activation Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+H">Han Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chuanyang Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+Z">Zhihua Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.10464v3-abstract-short" style="display: inline;"> Despite the remarkable achievements of language models (LMs) across a broad spectrum of tasks, their propensity for generating toxic outputs remains a prevalent concern. Current solutions involving finetuning or auxiliary models usually require extensive computational resources, hindering their practicality in large language models (LLMs). In this paper, we propose DeStein, a novel method that det&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10464v3-abstract-full').style.display = 'inline'; document.getElementById('2404.10464v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.10464v3-abstract-full" style="display: none;"> Despite the remarkable achievements of language models (LMs) across a broad spectrum of tasks, their propensity for generating toxic outputs remains a prevalent concern. Current solutions involving finetuning or auxiliary models usually require extensive computational resources, hindering their practicality in large language models (LLMs). In this paper, we propose DeStein, a novel method that detoxifies LMs by applying representation engineering in activation spaces with lower resource and time costs. Specifically, we derive detoxification vectors from self-induced, universal steering pairs through arithmetic operations in activation spaces. During inference, detoxification is achieved by fusing the detoxification vectors with the original representations in a head-wise manner. Empirical results demonstrate that our method significantly outperforms previous state-of-the-art approaches on various metrics, while also maintaining satisfactory generation quality and diversity. We further validate the practicality and scalability of DeStein with a series of white-box LLMs. The method is open-sourced at https://github.com/LizLizLi/DeStein. Warning: Some example model outputs may contain highly offensive or disturbing text. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10464v3-abstract-full').style.display = 'none'; document.getElementById('2404.10464v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01892">arXiv:2404.01892</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.01892">pdf</a>, <a href="https://arxiv.org/format/2404.01892">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Minimize Quantization Output Error with Bias Compensation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+H">Haoshuai Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+M">Mengting Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Z">Zheng Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+D">Deng-Ping Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuzhi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01892v1-abstract-short" style="display: inline;"> Quantization is a promising method that reduces memory usage and computational intensity of Deep Neural Networks (DNNs), but it often leads to significant output error that hinder model deployment. In this paper, we propose Bias Compensation (BC) to minimize the output error, thus realizing ultra-low-precision quantization without model fine-tuning. Instead of optimizing the non-convex quantizatio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01892v1-abstract-full').style.display = 'inline'; document.getElementById('2404.01892v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01892v1-abstract-full" style="display: none;"> Quantization is a promising method that reduces memory usage and computational intensity of Deep Neural Networks (DNNs), but it often leads to significant output error that hinder model deployment. In this paper, we propose Bias Compensation (BC) to minimize the output error, thus realizing ultra-low-precision quantization without model fine-tuning. Instead of optimizing the non-convex quantization process as in most previous methods, the proposed BC bypasses the step to directly minimize the quantizing output error by identifying a bias vector for compensation. We have established that the minimization of output error through BC is a convex problem and provides an efficient strategy to procure optimal solutions associated with minimal output error,without the need for training or fine-tuning. We conduct extensive experiments on Vision Transformer models and Large Language Models, and the results show that our method notably reduces quantization output error, thereby permitting ultra-low-precision post-training quantization and enhancing the task performance of models. Especially, BC improves the accuracy of ViT-B with 4-bit PTQ4ViT by 36.89% on the ImageNet-1k task, and decreases the perplexity of OPT-350M with 3-bit GPTQ by 5.97 on WikiText2.The code is in https://github.com/GongCheng1919/bias-compensation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01892v1-abstract-full').style.display = 'none'; document.getElementById('2404.01892v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> CAAI Artificial Intelligence Research, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00901">arXiv:2404.00901</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.00901">pdf</a>, <a href="https://arxiv.org/format/2404.00901">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Slightly Shift New Classes to Remember Old Classes for Video Class-Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiao%2C+J">Jian Jiao</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+Y">Yu Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Mei%2C+H">Hefei Mei</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+H">Heqian Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chuanyang Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+S">Shiyuan Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Hao%2C+X">Xinpeng Hao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongliang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00901v1-abstract-short" style="display: inline;"> Recent video class-incremental learning usually excessively pursues the accuracy of the newly seen classes and relies on memory sets to mitigate catastrophic forgetting of the old classes. However, limited storage only allows storing a few representative videos. So we propose SNRO, which slightly shifts the features of new classes to remember old classes. Specifically, SNRO contains Examples Spars&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00901v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00901v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00901v1-abstract-full" style="display: none;"> Recent video class-incremental learning usually excessively pursues the accuracy of the newly seen classes and relies on memory sets to mitigate catastrophic forgetting of the old classes. However, limited storage only allows storing a few representative videos. So we propose SNRO, which slightly shifts the features of new classes to remember old classes. Specifically, SNRO contains Examples Sparse(ES) and Early Break(EB). ES decimates at a lower sample rate to build memory sets and uses interpolation to align those sparse frames in the future. By this, SNRO stores more examples under the same memory consumption and forces the model to focus on low-semantic features which are harder to be forgotten. EB terminates the training at a small epoch, preventing the model from overstretching into the high-semantic space of the current task. Experiments on UCF101, HMDB51, and UESTC-MMEA-CL datasets show that SNRO performs better than other approaches while consuming the same memory consumption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00901v1-abstract-full').style.display = 'none'; document.getElementById('2404.00901v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16995">arXiv:2403.16995</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.16995">pdf</a>, <a href="https://arxiv.org/format/2403.16995">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Language Rectified Flow: Advancing Diffusion Language Generation with Probabilistic Flows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shujian Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+L">Lemeng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chengyue Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xingchao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16995v1-abstract-short" style="display: inline;"> Recent works have demonstrated success in controlling sentence attributes ($e.g.$, sentiment) and structure ($e.g.$, syntactic structure) based on the diffusion language model. A key component that drives theimpressive performance for generating high-quality samples from noise is iteratively denoise for thousands of steps. While beneficial, the complexity of starting from the noise and the learnin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16995v1-abstract-full').style.display = 'inline'; document.getElementById('2403.16995v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16995v1-abstract-full" style="display: none;"> Recent works have demonstrated success in controlling sentence attributes ($e.g.$, sentiment) and structure ($e.g.$, syntactic structure) based on the diffusion language model. A key component that drives theimpressive performance for generating high-quality samples from noise is iteratively denoise for thousands of steps. While beneficial, the complexity of starting from the noise and the learning steps has limited its implementation to many NLP real-world applications. This paper proposes Language Rectified Flow ({\ours}). Our method is based on the reformulation of the standard probabilistic flow models. Language rectified flow learns (neural) ordinary differential equation models to transport between the source distribution and the target distribution, hence providing a unified and effective solution to generative modeling and domain transfer. From the source distribution, our language rectified flow yields fast simulation and effectively decreases the inference time. Experiments on three challenging fine-grained control tasks and multiple high-quality text editing show that our method consistently outperforms its baselines. Extensive experiments and ablation studies demonstrate that our method can be general, effective, and beneficial for many NLP tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16995v1-abstract-full').style.display = 'none'; document.getElementById('2403.16995v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05100">arXiv:2403.05100</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.05100">pdf</a>, <a href="https://arxiv.org/format/2403.05100">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Adversarial Frontier: Quantifying Robustness via Adversarial Hypervolume </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+P">Ping Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+X">Xi Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhiyuan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qingfu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05100v2-abstract-short" style="display: inline;"> The escalating threat of adversarial attacks on deep learning models, particularly in security-critical fields, has underscored the need for robust deep learning systems. Conventional robustness evaluations have relied on adversarial accuracy, which measures a model&#39;s performance under a specific perturbation intensity. However, this singular metric does not fully encapsulate the overall resilienc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05100v2-abstract-full').style.display = 'inline'; document.getElementById('2403.05100v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05100v2-abstract-full" style="display: none;"> The escalating threat of adversarial attacks on deep learning models, particularly in security-critical fields, has underscored the need for robust deep learning systems. Conventional robustness evaluations have relied on adversarial accuracy, which measures a model&#39;s performance under a specific perturbation intensity. However, this singular metric does not fully encapsulate the overall resilience of a model against varying degrees of perturbation. To address this gap, we propose a new metric termed adversarial hypervolume, assessing the robustness of deep learning models comprehensively over a range of perturbation intensities from a multi-objective optimization standpoint. This metric allows for an in-depth comparison of defense mechanisms and recognizes the trivial improvements in robustness afforded by less potent defensive strategies. Additionally, we adopt a novel training algorithm that enhances adversarial robustness uniformly across various perturbation intensities, in contrast to methods narrowly focused on optimizing adversarial accuracy. Our extensive empirical studies validate the effectiveness of the adversarial hypervolume metric, demonstrating its ability to reveal subtle differences in robustness that adversarial accuracy overlooks. This research contributes a new measure of robustness and establishes a standard for assessing and benchmarking the resilience of current and future defensive models against adversarial threats. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05100v2-abstract-full').style.display = 'none'; document.getElementById('2403.05100v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15751">arXiv:2402.15751</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.15751">pdf</a>, <a href="https://arxiv.org/format/2402.15751">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Sparse MeZO: Less Parameters for Better Performance in Zeroth-Order LLM Fine-Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Z">Zirui Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chaoyu Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+M">Minhao Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Hsieh%2C+C">Cho-Jui Hsieh</a>, <a href="/search/cs?searchtype=author&amp;query=You%2C+Y">Yang You</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15751v1-abstract-short" style="display: inline;"> While fine-tuning large language models (LLMs) for specific tasks often yields impressive results, it comes at the cost of memory inefficiency due to back-propagation in gradient-based training. Memory-efficient Zeroth-order (MeZO) optimizers, recently proposed to address this issue, only require forward passes during training, making them more memory-friendly. However, the quality of gradient est&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15751v1-abstract-full').style.display = 'inline'; document.getElementById('2402.15751v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15751v1-abstract-full" style="display: none;"> While fine-tuning large language models (LLMs) for specific tasks often yields impressive results, it comes at the cost of memory inefficiency due to back-propagation in gradient-based training. Memory-efficient Zeroth-order (MeZO) optimizers, recently proposed to address this issue, only require forward passes during training, making them more memory-friendly. However, the quality of gradient estimates in zeroth order optimization often depends on the data dimensionality, potentially explaining why MeZO still exhibits significant performance drops compared to standard fine-tuning across various tasks. Inspired by the success of Parameter-Efficient Fine-Tuning (PEFT), this paper introduces Sparse MeZO, a novel memory-efficient zeroth-order optimization approach that applies ZO only to a carefully chosen subset of parameters. We propose a simple yet effective parameter selection scheme that yields significant performance gains with Sparse-MeZO. Additionally, we develop a memory-optimized implementation for sparse masking, ensuring the algorithm requires only inference-level memory consumption, allowing Sparse-MeZO to fine-tune LLaMA-30b on a single A100 GPU. Experimental results illustrate that Sparse-MeZO consistently improves both performance and convergence speed over MeZO without any overhead. For example, it achieves a 9\% absolute accuracy improvement and 3.5x speedup over MeZO on the RTE task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15751v1-abstract-full').style.display = 'none'; document.getElementById('2402.15751v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.03667">arXiv:2402.03667</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.03667">pdf</a>, <a href="https://arxiv.org/format/2402.03667">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models as an Indirect Reasoner: Contrapositive and Contradiction for Automated Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yanfang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yiliu Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhan%2C+Y">Yibing Zhan</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+D">Dapeng Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+D">Dacheng Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.03667v1-abstract-short" style="display: inline;"> Recently, increasing attention has been focused drawn on to improve the ability of Large Language Models (LLMs) to perform complex reasoning. However, previous methods, such as Chain-of-Thought and Self-Consistency, mainly follow Direct Reasoning (DR) frameworks, so they will meet difficulty in solving numerous real-world tasks which can hardly be solved via DR. Therefore, to strengthen the reason&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03667v1-abstract-full').style.display = 'inline'; document.getElementById('2402.03667v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.03667v1-abstract-full" style="display: none;"> Recently, increasing attention has been focused drawn on to improve the ability of Large Language Models (LLMs) to perform complex reasoning. However, previous methods, such as Chain-of-Thought and Self-Consistency, mainly follow Direct Reasoning (DR) frameworks, so they will meet difficulty in solving numerous real-world tasks which can hardly be solved via DR. Therefore, to strengthen the reasoning power of LLMs, this paper proposes a novel Indirect Reasoning (IR) method that employs the logic of contrapositives and contradictions to tackle IR tasks such as factual reasoning and mathematic proof. Specifically, our methodology comprises two steps. Firstly, we leverage the logical equivalence of contrapositive to augment the data and rules to enhance the comprehensibility of LLMs. Secondly, we design a set of prompt templates to trigger LLMs to conduct IR based on proof by contradiction that is logically equivalent to the original DR process. Our IR method is simple yet effective and can be straightforwardly integrated with existing DR methods to further boost the reasoning abilities of LLMs. The experimental results on popular LLMs, such as GPT-3.5-turbo and Gemini-pro, show that our IR method enhances the overall accuracy of factual reasoning by 27.33% and mathematical proof by 31.43%, when compared with traditional DR methods. Moreover, the methods combining IR and DR significantly outperform the methods solely using IR or DR, further demonstrating the effectiveness of our strategy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03667v1-abstract-full').style.display = 'none'; document.getElementById('2402.03667v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages,13 figures,4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.17910">arXiv:2401.17910</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.17910">pdf</a>, <a href="https://arxiv.org/format/2401.17910">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ControlCap: Controllable Region-level Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yuzhong Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yue Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Z">Zonghao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+W">Weijia Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+F">Fang Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Q">Qixiang Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.17910v3-abstract-short" style="display: inline;"> Region-level captioning is challenged by the caption degeneration issue, which refers to that pre-trained multimodal models tend to predict the most frequent captions but miss the less frequent ones. In this study, we propose a controllable region-level captioning (ControlCap) approach, which introduces control words to a multimodal model to address the caption degeneration issue. In specific, Con&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.17910v3-abstract-full').style.display = 'inline'; document.getElementById('2401.17910v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.17910v3-abstract-full" style="display: none;"> Region-level captioning is challenged by the caption degeneration issue, which refers to that pre-trained multimodal models tend to predict the most frequent captions but miss the less frequent ones. In this study, we propose a controllable region-level captioning (ControlCap) approach, which introduces control words to a multimodal model to address the caption degeneration issue. In specific, ControlCap leverages a discriminative module to generate control words within the caption space to partition it to multiple sub-spaces. The multimodal model is constrained to generate captions within a few sub-spaces containing the control words, which increases the opportunity of hitting less frequent captions, alleviating the caption degeneration issue. Furthermore, interactive control words can be given by either a human or an expert model, which enables captioning beyond the training caption space, enhancing the model&#39;s generalization ability. Extensive experiments on Visual Genome and RefCOCOg datasets show that ControlCap respectively improves the CIDEr score by 21.6 and 2.2, outperforming the state-of-the-arts by significant margins. Code is available at https://github.com/callsys/ControlCap. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.17910v3-abstract-full').style.display = 'none'; document.getElementById('2401.17910v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://github.com/callsys/ControlCap</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.09769">arXiv:2401.09769</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.09769">pdf</a>, <a href="https://arxiv.org/format/2401.09769">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Learning from Graphs with Heterophily: Recent Advances and Future Directions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chenghua Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+Y">Yao Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Jianxiang Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+C">Can Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Shan%2C+C">Caihua Shan</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+S">Siqiang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.09769v4-abstract-short" style="display: inline;"> Graphs are structured data that models complex relations between real-world entities. Heterophilic graphs, where linked nodes are prone to be with different labels or dissimilar features, have recently attracted significant attention and found many real-world applications. Meanwhile, increasing efforts have been made to advance learning from graphs with heterophily. Various graph heterophily measu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09769v4-abstract-full').style.display = 'inline'; document.getElementById('2401.09769v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.09769v4-abstract-full" style="display: none;"> Graphs are structured data that models complex relations between real-world entities. Heterophilic graphs, where linked nodes are prone to be with different labels or dissimilar features, have recently attracted significant attention and found many real-world applications. Meanwhile, increasing efforts have been made to advance learning from graphs with heterophily. Various graph heterophily measures, benchmark datasets, and learning paradigms are emerging rapidly. In this survey, we comprehensively review existing works on learning from graphs with heterophily. First, we overview over 500 publications, of which more than 340 are directly related to heterophilic graphs. After that, we survey existing metrics of graph heterophily and list recent benchmark datasets. Further, we systematically categorize existing methods based on a hierarchical taxonomy including GNN models, learning paradigms and practical applications. In addition, broader topics related to graph heterophily are also included. Finally, we discuss the primary challenges of existing studies and highlight promising avenues for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09769v4-abstract-full').style.display = 'none'; document.getElementById('2401.09769v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">64 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.06826">arXiv:2401.06826</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.06826">pdf</a>, <a href="https://arxiv.org/format/2401.06826">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Direct Distillation between Different Domains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tang%2C+J">Jialiang Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Shuo Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Niu%2C+G">Gang Niu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+H">Hongyuan Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J+T">Joey Tianyi Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Sugiyama%2C+M">Masashi Sugiyama</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.06826v1-abstract-short" style="display: inline;"> Knowledge Distillation (KD) aims to learn a compact student network using knowledge from a large pre-trained teacher network, where both networks are trained on data from the same distribution. However, in practical applications, the student network may be required to perform in a new scenario (i.e., the target domain), which usually exhibits significant differences from the known scenario of the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06826v1-abstract-full').style.display = 'inline'; document.getElementById('2401.06826v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.06826v1-abstract-full" style="display: none;"> Knowledge Distillation (KD) aims to learn a compact student network using knowledge from a large pre-trained teacher network, where both networks are trained on data from the same distribution. However, in practical applications, the student network may be required to perform in a new scenario (i.e., the target domain), which usually exhibits significant differences from the known scenario of the teacher network (i.e., the source domain). The traditional domain adaptation techniques can be integrated with KD in a two-stage process to bridge the domain gap, but the ultimate reliability of two-stage approaches tends to be limited due to the high computational consumption and the additional errors accumulated from both stages. To solve this problem, we propose a new one-stage method dubbed ``Direct Distillation between Different Domains&#34; (4Ds). We first design a learnable adapter based on the Fourier transform to separate the domain-invariant knowledge from the domain-specific knowledge. Then, we build a fusion-activation mechanism to transfer the valuable domain-invariant knowledge to the student network, while simultaneously encouraging the adapter within the teacher network to learn the domain-specific knowledge of the target data. As a result, the teacher network can effectively transfer categorical knowledge that aligns with the target domain of the student network. Intensive experiments on various benchmark datasets demonstrate that our proposed 4Ds method successfully produces reliable student networks and outperforms state-of-the-art approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06826v1-abstract-full').style.display = 'none'; document.getElementById('2401.06826v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.00711">arXiv:2401.00711</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.00711">pdf</a>, <a href="https://arxiv.org/format/2401.00711">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICASSP48485.2024.10446237">10.1109/ICASSP48485.2024.10446237 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Text2Avatar: Text to 3D Human Avatar Generation with Codebook-Driven Body Controllable Attribute </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chaoqun Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+Y">Yuqin Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+R">Ronghui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Bao%2C+A">Achun Bao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yachao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.00711v1-abstract-short" style="display: inline;"> Generating 3D human models directly from text helps reduce the cost and time of character modeling. However, achieving multi-attribute controllable and realistic 3D human avatar generation is still challenging due to feature coupling and the scarcity of realistic 3D human avatar datasets. To address these issues, we propose Text2Avatar, which can generate realistic-style 3D avatars based on the co&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00711v1-abstract-full').style.display = 'inline'; document.getElementById('2401.00711v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.00711v1-abstract-full" style="display: none;"> Generating 3D human models directly from text helps reduce the cost and time of character modeling. However, achieving multi-attribute controllable and realistic 3D human avatar generation is still challenging due to feature coupling and the scarcity of realistic 3D human avatar datasets. To address these issues, we propose Text2Avatar, which can generate realistic-style 3D avatars based on the coupled text prompts. Text2Avatar leverages a discrete codebook as an intermediate feature to establish a connection between text and avatars, enabling the disentanglement of features. Furthermore, to alleviate the scarcity of realistic style 3D human avatar data, we utilize a pre-trained unconditional 3D human avatar generation model to obtain a large amount of 3D avatar pseudo data, which allows Text2Avatar to achieve realistic style generation. Experimental results demonstrate that our method can generate realistic 3D avatars from coupled textual data, which is challenging for other existing methods in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00711v1-abstract-full').style.display = 'none'; document.getElementById('2401.00711v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.16039">arXiv:2312.16039</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.16039">pdf</a>, <a href="https://arxiv.org/format/2312.16039">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.patcog.2024.110962">10.1016/j.patcog.2024.110962 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dual-scale Enhanced and Cross-generative Consistency Learning for Semi-supervised Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+Y">Yunqi Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+T">Tao Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yizhe Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yi Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+K">Kelei He</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+H">Huazhu Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.16039v2-abstract-short" style="display: inline;"> Medical image segmentation plays a crucial role in computer-aided diagnosis. However, existing methods heavily rely on fully supervised training, which requires a large amount of labeled data with time-consuming pixel-wise annotations. Moreover, accurately segmenting lesions poses challenges due to variations in shape, size, and location. To address these issues, we propose a novel Dual-scale Enha&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.16039v2-abstract-full').style.display = 'inline'; document.getElementById('2312.16039v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.16039v2-abstract-full" style="display: none;"> Medical image segmentation plays a crucial role in computer-aided diagnosis. However, existing methods heavily rely on fully supervised training, which requires a large amount of labeled data with time-consuming pixel-wise annotations. Moreover, accurately segmenting lesions poses challenges due to variations in shape, size, and location. To address these issues, we propose a novel Dual-scale Enhanced and Cross-generative consistency learning framework for semi-supervised medical image Segmentation (DEC-Seg). First, we propose a Cross-level Feature Aggregation (CFA) module that integrates cross-level adjacent layers to enhance the feature representation ability across different resolutions. To address scale variation, we present a scale-enhanced consistency constraint, which ensures consistency in the segmentation maps generated from the same input image at different scales. This constraint helps handle variations in lesion sizes and improves the robustness of the model. Furthermore, we propose a cross-generative consistency scheme, in which the original and perturbed images can be reconstructed using cross-segmentation maps. This consistency constraint allows us to mine effective feature representations and boost the segmentation performance. To further exploit the scale information, we propose a Dual-scale Complementary Fusion (DCF) module that integrates features from two scale-specific decoders operating at different scales to help produce more accurate segmentation maps. Extensive experimental results on multiple medical segmentation tasks (polyp, skin lesion, and brain glioma) demonstrate the effectiveness of our DEC-Seg against other state-of-the-art semi-supervised segmentation approaches. The implementation code will be released at https://github.com/taozh2017/DECSeg. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.16039v2-abstract-full').style.display = 'none'; document.getElementById('2312.16039v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.15195">arXiv:2312.15195</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.15195">pdf</a>, <a href="https://arxiv.org/format/2312.15195">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Mutual Information as Intrinsic Reward of Reinforcement Learning Agents for On-demand Ride Pooling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xianjie Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+J">Jiahao Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+K">Kai Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yifei Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.15195v2-abstract-short" style="display: inline;"> The emergence of on-demand ride pooling services allows each vehicle to serve multiple passengers at a time, thus increasing drivers&#39; income and enabling passengers to travel at lower prices than taxi/car on-demand services (only one passenger can be assigned to a car at a time like UberX and Lyft). Although on-demand ride pooling services can bring so many benefits, ride pooling services need a w&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15195v2-abstract-full').style.display = 'inline'; document.getElementById('2312.15195v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.15195v2-abstract-full" style="display: none;"> The emergence of on-demand ride pooling services allows each vehicle to serve multiple passengers at a time, thus increasing drivers&#39; income and enabling passengers to travel at lower prices than taxi/car on-demand services (only one passenger can be assigned to a car at a time like UberX and Lyft). Although on-demand ride pooling services can bring so many benefits, ride pooling services need a well-defined matching strategy to maximize the benefits for all parties (passengers, drivers, aggregation companies and environment), in which the regional dispatching of vehicles has a significant impact on the matching and revenue. Existing algorithms often only consider revenue maximization, which makes it difficult for requests with unusual distribution to get a ride. How to increase revenue while ensuring a reasonable assignment of requests brings a challenge to ride pooling service companies (aggregation companies). In this paper, we propose a framework for vehicle dispatching for ride pooling tasks, which splits the city into discrete dispatching regions and uses the reinforcement learning (RL) algorithm to dispatch vehicles in these regions. We also consider the mutual information (MI) between vehicle and order distribution as the intrinsic reward of the RL algorithm to improve the correlation between their distributions, thus ensuring the possibility of getting a ride for unusually distributed requests. In experimental results on a real-world taxi dataset, we demonstrate that our framework can significantly increase revenue up to an average of 3\% over the existing best on-demand ride pooling method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15195v2-abstract-full').style.display = 'none'; document.getElementById('2312.15195v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAMAS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.14398">arXiv:2312.14398</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.14398">pdf</a>, <a href="https://arxiv.org/format/2312.14398">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> ZMM-TTS: Zero-shot Multilingual and Multispeaker Speech Synthesis Conditioned on Self-supervised Discrete Speech Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cooper%2C+E">Erica Cooper</a>, <a href="/search/cs?searchtype=author&amp;query=Wells%2C+D">Dan Wells</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Longbiao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Dang%2C+J">Jianwu Dang</a>, <a href="/search/cs?searchtype=author&amp;query=Richmond%2C+K">Korin Richmond</a>, <a href="/search/cs?searchtype=author&amp;query=Yamagishi%2C+J">Junichi Yamagishi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.14398v2-abstract-short" style="display: inline;"> Neural text-to-speech (TTS) has achieved human-like synthetic speech for single-speaker, single-language synthesis. Multilingual TTS systems are limited to resource-rich languages due to the lack of large paired text and studio-quality audio data. TTS systems are typically built using a single speaker&#39;s voices, but there is growing interest in developing systems that can synthesize voices for new&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14398v2-abstract-full').style.display = 'inline'; document.getElementById('2312.14398v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.14398v2-abstract-full" style="display: none;"> Neural text-to-speech (TTS) has achieved human-like synthetic speech for single-speaker, single-language synthesis. Multilingual TTS systems are limited to resource-rich languages due to the lack of large paired text and studio-quality audio data. TTS systems are typically built using a single speaker&#39;s voices, but there is growing interest in developing systems that can synthesize voices for new speakers using only a few seconds of their speech. This paper presents ZMM-TTS, a multilingual and multispeaker framework utilizing quantized latent speech representations from a large-scale, pre-trained, self-supervised model. Our paper combines text-based and speech-based self-supervised learning models for multilingual speech synthesis. Our proposed model has zero-shot generalization ability not only for unseen speakers but also for unseen languages. We have conducted comprehensive subjective and objective evaluations through a series of experiments. Our model has proven effective in terms of speech naturalness and similarity for both seen and unseen speakers in six high-resource languages. We also tested the efficiency of our method on two hypothetically low-resource languages. The results are promising, indicating that our proposed approach can synthesize audio that is intelligible and has a high degree of similarity to the target speaker&#39;s voice, even without any training data for the new, unseen language. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14398v2-abstract-full').style.display = 'none'; document.getElementById('2312.14398v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE/ACM TASLP, 16 pages plus 1 page of bio and photos</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10758">arXiv:2312.10758</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.10758">pdf</a>, <a href="https://arxiv.org/format/2312.10758">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SHaRPose: Sparse High-Resolution Representation for Human Pose Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=An%2C+X">Xiaoqi An</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+L">Lin Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+N">Nannan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+D">Di Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10758v1-abstract-short" style="display: inline;"> High-resolution representation is essential for achieving good performance in human pose estimation models. To obtain such features, existing works utilize high-resolution input images or fine-grained image tokens. However, this dense high-resolution representation brings a significant computational burden. In this paper, we address the following question: &#34;Only sparse human keypoint locations are&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10758v1-abstract-full').style.display = 'inline'; document.getElementById('2312.10758v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10758v1-abstract-full" style="display: none;"> High-resolution representation is essential for achieving good performance in human pose estimation models. To obtain such features, existing works utilize high-resolution input images or fine-grained image tokens. However, this dense high-resolution representation brings a significant computational burden. In this paper, we address the following question: &#34;Only sparse human keypoint locations are detected for human pose estimation, is it really necessary to describe the whole image in a dense, high-resolution manner?&#34; Based on dynamic transformer models, we propose a framework that only uses Sparse High-resolution Representations for human Pose estimation (SHaRPose). In detail, SHaRPose consists of two stages. At the coarse stage, the relations between image regions and keypoints are dynamically mined while a coarse estimation is generated. Then, a quality predictor is applied to decide whether the coarse estimation results should be refined. At the fine stage, SHaRPose builds sparse high-resolution representations only on the regions related to the keypoints and provides refined high-precision human pose estimations. Extensive experiments demonstrate the outstanding performance of the proposed method. Specifically, compared to the state-of-the-art method ViTPose, our model SHaRPose-Base achieves 77.4 AP (+0.5 AP) on the COCO validation set and 76.7 AP (+0.5 AP) on the COCO test-dev set, and infers at a speed of $1.4\times$ faster than ViTPose-Base. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10758v1-abstract-full').style.display = 'none'; document.getElementById('2312.10758v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to AAAI 2024</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gong%2C+C&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10