CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 1,383 results for author: <span class="mathjax">Li, D</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Li%2C+D">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Li, D"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Li%2C+D&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Li, D"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17194">arXiv:2411.17194</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.17194">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> The Role of Urban Designers in the Era of AIGC: An Experimental Study Based on Public Participation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Mo%2C+D">Di Mo</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+K">Keyi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+Q">Qi Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dengyun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+L">Liyan Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+J">Junyan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17194v1-abstract-short" style="display: inline;"> This study explores the application of Artificial Intelligence Generated Content (AIGC) technology in urban planning and design, with a particular focus on its impact on placemaking and public participation. By utilizing natural language pro-cessing and image generation models such as Stable Diffusion, AIGC enables efficient transformation from textual descriptions to visual representations, advan&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17194v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17194v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17194v1-abstract-full" style="display: none;"> This study explores the application of Artificial Intelligence Generated Content (AIGC) technology in urban planning and design, with a particular focus on its impact on placemaking and public participation. By utilizing natural language pro-cessing and image generation models such as Stable Diffusion, AIGC enables efficient transformation from textual descriptions to visual representations, advancing the visualization of urban spatial experiences. The research examines the evolving role of designers in participatory planning processes, specifically how AIGC facilitates their transition from traditional creators to collaborators and facilitators, and the implications of this shift on the effectiveness of public engagement. Through experimental evaluation, the study assesses the de-sign quality of urban pocket gardens generated under varying levels of designer involvement, analyzing the influence of de-signers on the aesthetic quality and contextual relevance of AIGC outputs. The findings reveal that designers significantly improve the quality of AIGC-generated designs by providing guidance and structural frameworks, highlighting the substantial potential of human-AI collaboration in urban design. This research offers valuable insights into future collaborative approaches between planners and AIGC technologies, aiming to integrate technological advancements with professional practice to foster sustainable urban development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17194v1-abstract-full').style.display = 'none'; document.getElementById('2411.17194v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17101">arXiv:2411.17101</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.17101">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Software Fault Localization Based on Multi-objective Feature Fusion and Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hu%2C+X">Xiaolei Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongcheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wong%2C+W+E">W. Eric Wong</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+Y">Ya Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17101v1-abstract-short" style="display: inline;"> Software fault localization remains challenging due to limited feature diversity and low precision in traditional methods. This paper proposes a novel approach that integrates multi-objective optimization with deep learning models to improve both accuracy and efficiency in fault localization (FL). By framing feature selection as a multi-objective optimization problem (MOP), we extract and fuse thr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17101v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17101v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17101v1-abstract-full" style="display: none;"> Software fault localization remains challenging due to limited feature diversity and low precision in traditional methods. This paper proposes a novel approach that integrates multi-objective optimization with deep learning models to improve both accuracy and efficiency in fault localization (FL). By framing feature selection as a multi-objective optimization problem (MOP), we extract and fuse three critical fault-related feature sets: spectrum-based, mutation-based, and text-based features, into a comprehensive feature fusion model. These features are then embedded within a deep learning architecture, comprising a multilayer perceptron (MLP) and gated recurrent network (GRN), which together enhance localization accuracy and generalizability. Experiments on the Defects4J benchmark dataset with 434 faults show that the proposed algorithm reduces processing time by 78.2% compared to single-objective methods. Additionally, our MLP and GRN models achieve a 94.2% improvement in localization accuracy compared to traditional FL methods, outperforming state-of-the-art deep learning-based FL method by 7.67%. Further validation using the PROMISE dataset demonstrates the generalizability of the proposed model, showing a 4.6% accuracy improvement in cross-project tests over state-of-the-art deep learning-based FL method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17101v1-abstract-full').style.display = 'none'; document.getElementById('2411.17101v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16594">arXiv:2411.16594</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.16594">pdf</a>, <a href="https://arxiv.org/format/2411.16594">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> From Generation to Judgment: Opportunities and Challenges of LLM-as-a-judge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dawei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+B">Bohan Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+L">Liangjie Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Beigi%2C+A">Alimohammad Beigi</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+C">Chengshuai Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+Z">Zhen Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Bhattacharjee%2C+A">Amrita Bhattacharjee</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yuxuan Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Canyu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+T">Tianhao Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Shu%2C+K">Kai Shu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+L">Lu Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Huan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16594v1-abstract-short" style="display: inline;"> Assessment and evaluation have long been critical challenges in artificial intelligence (AI) and natural language processing (NLP). However, traditional methods, whether matching-based or embedding-based, often fall short of judging subtle attributes and delivering satisfactory results. Recent advancements in Large Language Models (LLMs) inspire the &#34;LLM-as-a-judge&#34; paradigm, where LLMs are levera&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16594v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16594v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16594v1-abstract-full" style="display: none;"> Assessment and evaluation have long been critical challenges in artificial intelligence (AI) and natural language processing (NLP). However, traditional methods, whether matching-based or embedding-based, often fall short of judging subtle attributes and delivering satisfactory results. Recent advancements in Large Language Models (LLMs) inspire the &#34;LLM-as-a-judge&#34; paradigm, where LLMs are leveraged to perform scoring, ranking, or selection across various tasks and applications. This paper provides a comprehensive survey of LLM-based judgment and assessment, offering an in-depth overview to advance this emerging field. We begin by giving detailed definitions from both input and output perspectives. Then we introduce a comprehensive taxonomy to explore LLM-as-a-judge from three dimensions: what to judge, how to judge and where to judge. Finally, we compile benchmarks for evaluating LLM-as-a-judge and highlight key challenges and promising directions, aiming to provide valuable insights and inspire future research in this promising research area. Paper list and more resources about LLM-as-a-judge can be found at \url{https://github.com/llm-as-a-judge/Awesome-LLM-as-a-judge} and \url{https://llm-as-a-judge.github.io}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16594v1-abstract-full').style.display = 'none'; document.getElementById('2411.16594v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15223">arXiv:2411.15223</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.15223">pdf</a>, <a href="https://arxiv.org/format/2411.15223">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An accuracy improving method for advertising click through rate prediction based on enhanced xDeepFM model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xi%2C+X">Xiaowei Xi</a>, <a href="/search/cs?searchtype=author&amp;query=Leng%2C+S">Song Leng</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+Y">Yuqing Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dalin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15223v1-abstract-short" style="display: inline;"> Advertising click-through rate (CTR) prediction aims to forecast the probability that a user will click on an advertisement in a given context, thus providing enterprises with decision support for product ranking and ad placement. However, CTR prediction faces challenges such as data sparsity and class imbalance, which adversely affect model training effectiveness. Moreover, most current CTR predi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15223v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15223v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15223v1-abstract-full" style="display: none;"> Advertising click-through rate (CTR) prediction aims to forecast the probability that a user will click on an advertisement in a given context, thus providing enterprises with decision support for product ranking and ad placement. However, CTR prediction faces challenges such as data sparsity and class imbalance, which adversely affect model training effectiveness. Moreover, most current CTR prediction models fail to fully explore the associations among user history, interests, and target advertisements from multiple perspectives, neglecting important information at different levels. To address these issues, this paper proposes an improved CTR prediction model based on the xDeepFM architecture. By integrating a multi-head attention mechanism, the model can simultaneously focus on different aspects of feature interactions, enhancing its ability to learn intricate patterns without significantly increasing computational complexity. Furthermore, replacing the linear model with a Factorization Machine (FM) model improves the handling of high-dimensional sparse data by flexibly capturing both first-order and second-order feature interactions. Experimental results on the Criteo dataset demonstrate that the proposed model outperforms other state-of-the-art methods, showing significant improvements in both AUC and Logloss metrics. This enhancement facilitates better mining of implicit relationships between features and improves the accuracy of advertising CTR prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15223v1-abstract-full').style.display = 'none'; document.getElementById('2411.15223v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 7 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13281">arXiv:2411.13281</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.13281">pdf</a>, <a href="https://arxiv.org/format/2411.13281">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> VideoAutoArena: An Automated Arena for Evaluating Large Multimodal Models in Video Analysis through User Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+Z">Ziyang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+H">Haoning Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongxu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+J">Jing Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Kankanhalli%2C+M">Mohan Kankanhalli</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Junnan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13281v1-abstract-short" style="display: inline;"> Large multimodal models (LMMs) with advanced video analysis capabilities have recently garnered significant attention. However, most evaluations rely on traditional methods like multiple-choice questions in benchmarks such as VideoMME and LongVideoBench, which are prone to lack the depth needed to capture the complex demands of real-world users. To address this limitation-and due to the prohibitiv&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13281v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13281v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13281v1-abstract-full" style="display: none;"> Large multimodal models (LMMs) with advanced video analysis capabilities have recently garnered significant attention. However, most evaluations rely on traditional methods like multiple-choice questions in benchmarks such as VideoMME and LongVideoBench, which are prone to lack the depth needed to capture the complex demands of real-world users. To address this limitation-and due to the prohibitive cost and slow pace of human annotation for video tasks-we introduce VideoAutoArena, an arena-style benchmark inspired by LMSYS Chatbot Arena&#39;s framework, designed to automatically assess LMMs&#39; video analysis abilities. VideoAutoArena utilizes user simulation to generate open-ended, adaptive questions that rigorously assess model performance in video understanding. The benchmark features an automated, scalable evaluation framework, incorporating a modified ELO Rating System for fair and continuous comparisons across multiple LMMs. To validate our automated judging system, we construct a &#39;gold standard&#39; using a carefully curated subset of human annotations, demonstrating that our arena strongly aligns with human judgment while maintaining scalability. Additionally, we introduce a fault-driven evolution strategy, progressively increasing question complexity to push models toward handling more challenging video analysis scenarios. Experimental results demonstrate that VideoAutoArena effectively differentiates among state-of-the-art LMMs, providing insights into model strengths and areas for improvement. To further streamline our evaluation, we introduce VideoAutoBench as an auxiliary benchmark, where human annotators label winners in a subset of VideoAutoArena battles. We use GPT-4o as a judge to compare responses against these human-validated answers. Together, VideoAutoArena and VideoAutoBench offer a cost-effective, and scalable framework for evaluating LMMs in user-centric video analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13281v1-abstract-full').style.display = 'none'; document.getElementById('2411.13281v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://videoautoarena.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12913">arXiv:2411.12913</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.12913">pdf</a>, <a href="https://arxiv.org/format/2411.12913">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MLDGG: Meta-Learning for Domain Generalization on Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tian%2C+Q">Qin Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+C">Chen Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Shao%2C+M">Minglai Shao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wenjun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Y">Yujie Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12913v1-abstract-short" style="display: inline;"> Domain generalization on graphs aims to develop models with robust generalization capabilities, ensuring effective performance on the testing set despite disparities between testing and training distributions. However, existing methods often rely on static encoders directly applied to the target domain, constraining its flexible adaptability. In contrast to conventional methodologies, which concen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12913v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12913v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12913v1-abstract-full" style="display: none;"> Domain generalization on graphs aims to develop models with robust generalization capabilities, ensuring effective performance on the testing set despite disparities between testing and training distributions. However, existing methods often rely on static encoders directly applied to the target domain, constraining its flexible adaptability. In contrast to conventional methodologies, which concentrate on developing specific generalized models, our framework, MLDGG, endeavors to achieve adaptable generalization across diverse domains by integrating cross-multi-domain meta-learning with structure learning and semantic identification. Initially, it introduces a generalized structure learner to mitigate the adverse effects of task-unrelated edges, enhancing the comprehensiveness of representations learned by Graph Neural Networks (GNNs) while capturing shared structural information across domains. Subsequently, a representation learner is designed to disentangle domain-invariant semantic and domain-specific variation information in node embedding by leveraging causal reasoning for semantic identification, further enhancing generalization. In the context of meta-learning, meta-parameters for both learners are optimized to facilitate knowledge transfer and enable effective adaptation to graphs through fine-tuning within the target domains, where target graphs are inaccessible during training. Our empirical results demonstrate that MLDGG surpasses baseline methods, showcasing its effectiveness in three different distribution shift settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12913v1-abstract-full').style.display = 'none'; document.getElementById('2411.12913v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in KDD 2025 (research track)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12592">arXiv:2411.12592</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.12592">pdf</a>, <a href="https://arxiv.org/format/2411.12592">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SPARS3R: Semantic Prior Alignment and Regularization for Sparse 3D Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tang%2C+Y">Yutao Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yuxiang Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Deming Li</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+C">Cheng Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12592v1-abstract-short" style="display: inline;"> Recent efforts in Gaussian-Splat-based Novel View Synthesis can achieve photorealistic rendering; however, such capability is limited in sparse-view scenarios due to sparse initialization and over-fitting floaters. Recent progress in depth estimation and alignment can provide dense point cloud with few views; however, the resulting pose accuracy is suboptimal. In this work, we present SPARS3R, whi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12592v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12592v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12592v1-abstract-full" style="display: none;"> Recent efforts in Gaussian-Splat-based Novel View Synthesis can achieve photorealistic rendering; however, such capability is limited in sparse-view scenarios due to sparse initialization and over-fitting floaters. Recent progress in depth estimation and alignment can provide dense point cloud with few views; however, the resulting pose accuracy is suboptimal. In this work, we present SPARS3R, which combines the advantages of accurate pose estimation from Structure-from-Motion and dense point cloud from depth estimation. To this end, SPARS3R first performs a Global Fusion Alignment process that maps a prior dense point cloud to a sparse point cloud from Structure-from-Motion based on triangulated correspondences. RANSAC is applied during this process to distinguish inliers and outliers. SPARS3R then performs a second, Semantic Outlier Alignment step, which extracts semantically coherent regions around the outliers and performs local alignment in these regions. Along with several improvements in the evaluation process, we demonstrate that SPARS3R can achieve photorealistic rendering with sparse images and significantly outperforms existing approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12592v1-abstract-full').style.display = 'none'; document.getElementById('2411.12592v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12271">arXiv:2411.12271</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.12271">pdf</a>, <a href="https://arxiv.org/format/2411.12271">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> SMT-Layout: A MaxSMT-based Approach Supporting Real-time Interaction of Real-world GUI Layout </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+B">Bohan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dawei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+M">Ming Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+S">Shaowei Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12271v1-abstract-short" style="display: inline;"> Leveraging the flexible expressive ability of (Max)SMT and the powerful solving ability of SMT solvers, we propose a novel layout model named SMT-Layout. SMT-Layout is the first constraint-based layout model that can support real-time interaction for real-world GUI layout adapting to various screen sizes with only one specification. Previous works neglect the hierarchy information among widgets an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12271v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12271v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12271v1-abstract-full" style="display: none;"> Leveraging the flexible expressive ability of (Max)SMT and the powerful solving ability of SMT solvers, we propose a novel layout model named SMT-Layout. SMT-Layout is the first constraint-based layout model that can support real-time interaction for real-world GUI layout adapting to various screen sizes with only one specification. Previous works neglect the hierarchy information among widgets and thus cannot exploit the reasoning ability of solvers. For the first time, we introduce Boolean variables to encode the hierarchy relationship, boosting the reasoning ability of SMT solvers. The workflow is divided into two stages. At the development end, two novel preprocessing methods are proposed to simplify constraints and extract useful information in advance, easing the solving burden. After deploying constraints to the terminal end, SMT solvers are applied to solve constraints incrementally. Besides mainstream SMT solvers, a local search solver is customized to this scenario. Experiments show that SMT-Layout can support millisecond-level interaction for real-world layouts, even on devices with low computing power and rigorous memory limitations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12271v1-abstract-full').style.display = 'none'; document.getElementById('2411.12271v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11874">arXiv:2411.11874</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.11874">pdf</a>, <a href="https://arxiv.org/format/2411.11874">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Personalized Continual EEG Decoding Framework for Knowledge Retention and Transfer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+H">Hye-Bin Shin</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+K">Kang Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11874v1-abstract-short" style="display: inline;"> The significant inter-subject variability in electroencephalogram (EEG) signals often leads to knowledge being overwritten as new tasks are introduced in continual EEG decoding. While retraining on the entire dataset with each new input can prevent forgetting, this approach incurs high computational costs. An ideal brain-computer interface (BCI) model should continuously learn new information with&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11874v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11874v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11874v1-abstract-full" style="display: none;"> The significant inter-subject variability in electroencephalogram (EEG) signals often leads to knowledge being overwritten as new tasks are introduced in continual EEG decoding. While retraining on the entire dataset with each new input can prevent forgetting, this approach incurs high computational costs. An ideal brain-computer interface (BCI) model should continuously learn new information without retraining from scratch, thus reducing these costs. Most transfer learning models rely on large source-domain datasets for pre-training, yet data availability is frequently limited in real-world applications due to privacy concerns. Furthermore, such models are prone to catastrophic forgetting in continual EEG decoding tasks. To address these challenges, we propose a personalized subject-incremental learning (SIL) framework for continual EEG decoding that integrates Euclidean Alignment for fast domain adaptation, an exemplar replay mechanism to retain prior knowledge, and reservoir sampling-based memory management to handle memory constraints in long-term learning. Validated on the OpenBMI dataset with 54 subjects, our framework effectively balances knowledge retention with classification performance in continual MI-EEG tasks, offering a scalable solution for real-world BCI applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11874v1-abstract-full').style.display = 'none'; document.getElementById('2411.11874v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10914">arXiv:2411.10914</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10914">pdf</a>, <a href="https://arxiv.org/format/2411.10914">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BPO: Towards Balanced Preference Optimization between Knowledge Breadth and Depth in Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Sizhe Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Tong%2C+Y">Yongqi Tong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hengyuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dawei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+T">Tianlong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10914v1-abstract-short" style="display: inline;"> Reinforcement Learning with Human Feedback (RLHF) is the key to the success of large language models (LLMs) in recent years. In this work, we first introduce the concepts of knowledge breadth and knowledge depth, which measure the comprehensiveness and depth of an LLM or knowledge source respectively. We reveal that the imbalance in the number of prompts and responses can lead to a potential dispa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10914v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10914v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10914v1-abstract-full" style="display: none;"> Reinforcement Learning with Human Feedback (RLHF) is the key to the success of large language models (LLMs) in recent years. In this work, we first introduce the concepts of knowledge breadth and knowledge depth, which measure the comprehensiveness and depth of an LLM or knowledge source respectively. We reveal that the imbalance in the number of prompts and responses can lead to a potential disparity in breadth and depth learning within alignment tuning datasets by showing that even a simple uniform method for balancing the number of instructions and responses can lead to significant improvements. Building on this, we further propose Balanced Preference Optimization (BPO), designed to dynamically augment the knowledge depth of each sample. BPO is motivated by the observation that the usefulness of knowledge varies across samples, necessitating tailored learning of knowledge depth. To achieve this, we introduce gradient-based clustering, estimating the knowledge informativeness and usefulness of each augmented sample based on the model&#39;s optimization direction. Our experimental results across various benchmarks demonstrate that BPO outperforms other baseline methods in alignment tuning while maintaining training efficiency. Furthermore, we conduct a detailed analysis of each component of BPO, providing guidelines for future research in preference data optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10914v1-abstract-full').style.display = 'none'; document.getElementById('2411.10914v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10449">arXiv:2411.10449</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10449">pdf</a>, <a href="https://arxiv.org/format/2411.10449">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Love in Action: Gamifying Public Video Cameras for Fostering Social Relationships in Real World </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Da Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+G">Geng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yaoning Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+X">Xiaobing Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10449v1-abstract-short" style="display: inline;"> In this paper, we create &#34;Love in Action&#34; (LIA), a body language-based social game utilizing video cameras installed in public spaces to enhance social relationships in real-world. In the game, participants assume dual roles, i.e., requesters, who issue social requests, and performers, who respond social requests through performing specified body languages. To mediate the communication between par&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10449v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10449v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10449v1-abstract-full" style="display: none;"> In this paper, we create &#34;Love in Action&#34; (LIA), a body language-based social game utilizing video cameras installed in public spaces to enhance social relationships in real-world. In the game, participants assume dual roles, i.e., requesters, who issue social requests, and performers, who respond social requests through performing specified body languages. To mediate the communication between participants, we build an AI-enhanced video analysis system incorporating multiple visual analysis modules like person detection, attribute recognition, and action recognition, to assess the performer&#39;s body language quality. A two-week field study involving 27 participants shows significant improvements in their social friendships, as indicated by self-reported questionnaires. Moreover, user experiences are investigated to highlight the potential of public video cameras as a novel communication medium for socializing in public spaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10449v1-abstract-full').style.display = 'none'; document.getElementById('2411.10449v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted as a main track paper by EAI-ArtsIT 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 14J60 (Primary) 14F05; 14J26 (Secondary) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10003">arXiv:2411.10003</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10003">pdf</a>, <a href="https://arxiv.org/format/2411.10003">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Pro-Prophet: A Systematic Load Balancing Method for Efficient Parallel Training of Large-scale MoE Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lai%2C+Z">Zhiquan Lai</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shengwei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Weijie Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Ge%2C+K">Keshi Ge</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+A">Ao Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+H">Huayou Su</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongsheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10003v2-abstract-short" style="display: inline;"> The size of deep learning models has been increasing to enhance model quality. The linear increase in training computation budget with model size means that training an extremely large-scale model is exceedingly time-consuming. Recently, the Mixture of Expert (MoE) has drawn significant attention as it can scale models to extra-large sizes with a stable computation budget. However, inefficient dis&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10003v2-abstract-full').style.display = 'inline'; document.getElementById('2411.10003v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10003v2-abstract-full" style="display: none;"> The size of deep learning models has been increasing to enhance model quality. The linear increase in training computation budget with model size means that training an extremely large-scale model is exceedingly time-consuming. Recently, the Mixture of Expert (MoE) has drawn significant attention as it can scale models to extra-large sizes with a stable computation budget. However, inefficient distributed training of large-scale MoE models hinders their broader application. Specifically, a considerable dynamic load imbalance occurs among devices during training, significantly reducing throughput. Several load-balancing works have been proposed to address the challenge. System-level solutions draw more attention for their hardware affinity and non-disruption of model convergence compared to algorithm-level ones. However, they are troubled by high communication costs and poor communication-computation overlapping. To address these challenges, we propose a systematic load-balancing method, Pro-Prophet, which consists of a planner and a scheduler for efficient parallel training of large-scale MoE models. To adapt to the dynamic load imbalance, we profile training statistics and use them to design Pro-Prophet. For lower communication volume, Pro-Prophet planner determines a series of lightweight load-balancing strategies and efficiently searches for a communication-efficient one for training based on the statistics. For sufficient overlapping of communication and computation, Pro-Prophet scheduler schedules the data-dependent operations based on the statistics and operation features, further improving the training throughput. Experimental results indicate that Pro-Prophet achieves up to 2.66x speedup compared to Deepspeed-MoE and FasterMoE. Additionally, Pro-Prophet achieves a load-balancing enhancement of up to 11.01x when compared to FasterMoE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10003v2-abstract-full').style.display = 'none'; document.getElementById('2411.10003v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09945">arXiv:2411.09945</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.09945">pdf</a>, <a href="https://arxiv.org/format/2411.09945">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> TEESlice: Protecting Sensitive Neural Network Models in Trusted Execution Environments When Attackers have Pre-Trained Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Ding Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Ziqi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+M">Mengyu Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+Y">Yifeng Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xiangqun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09945v1-abstract-short" style="display: inline;"> Trusted Execution Environments (TEE) are used to safeguard on-device models. However, directly employing TEEs to secure the entire DNN model is challenging due to the limited computational speed. Utilizing GPU can accelerate DNN&#39;s computation speed but commercial widely-available GPUs usually lack security protection. To this end, scholars introduce TSDP, a method that protects privacy-sensitive w&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09945v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09945v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09945v1-abstract-full" style="display: none;"> Trusted Execution Environments (TEE) are used to safeguard on-device models. However, directly employing TEEs to secure the entire DNN model is challenging due to the limited computational speed. Utilizing GPU can accelerate DNN&#39;s computation speed but commercial widely-available GPUs usually lack security protection. To this end, scholars introduce TSDP, a method that protects privacy-sensitive weights within TEEs and offloads insensitive weights to GPUs. Nevertheless, current methods do not consider the presence of a knowledgeable adversary who can access abundant publicly available pre-trained models and datasets. This paper investigates the security of existing methods against such a knowledgeable adversary and reveals their inability to fulfill their security promises. Consequently, we introduce a novel partition before training strategy, which effectively separates privacy-sensitive weights from other components of the model. Our evaluation demonstrates that our approach can offer full model protection with a computational cost reduced by a factor of 10. In addition to traditional CNN models, we also demonstrate the scalability to large language models. Our approach can compress the private functionalities of the large language model to lightweight slices and achieve the same level of protection as the shielding-whole-model baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09945v1-abstract-full').style.display = 'none'; document.getElementById('2411.09945v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by TOSEM. Extended version of the S&amp;P24 paper (arXiv:2310.07152)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06931">arXiv:2411.06931</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06931">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Soft Condensed Matter">cond-mat.soft</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> 3D Printing of Near-Ambient Responsive Liquid Crystal Elastomers with Enhanced Nematic Order and Pluralized Transformation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongxiao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yuxuan Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xingjian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xingxiang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Z">Zhengqing Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+B">Boxi Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Nong%2C+S">Shutong Nong</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Jiyang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+T">Tingrui Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Weihua Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shiwu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+M">Mujun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06931v1-abstract-short" style="display: inline;"> Liquid Crystal Elastomers with near-ambient temperature-responsiveness (NAT-LCEs) have been extensively studied for building bio-compatible, low-power consumption devices and robotics. However, conventional manufacturing methods face limitations in programmability (e.g., molding) or low nematic order (e.g., DIW printing). Here, a hybrid cooling strategy is proposed for programmable 3D printing of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06931v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06931v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06931v1-abstract-full" style="display: none;"> Liquid Crystal Elastomers with near-ambient temperature-responsiveness (NAT-LCEs) have been extensively studied for building bio-compatible, low-power consumption devices and robotics. However, conventional manufacturing methods face limitations in programmability (e.g., molding) or low nematic order (e.g., DIW printing). Here, a hybrid cooling strategy is proposed for programmable 3D printing of NAT-LCEs with enhanced nematic order, intricate shape forming, and morphing capability. By integrating a low-temperature nozzle and a cooling platform into a 3D printer, the resulting temperature field synergistically facilitates mesogen alignment during extrusion and disruption-free UV cross-linking. This method achieves a nematic order 3000% higher than those fabricated using traditional room temperature 3D printing. Enabled by shifting of transition temperature during hybrid cooling printing, printed sheets spontaneously turn into 3D structures after release from the platform, exhibiting bidirectional deformation with heating and cooling. By adjusting the nozzle and plate temperatures, NAT-LCEs with graded properties can be fabricated for intricate shape morphing. A wristband system with enhanced heart rate monitoring is also developed based on 3D-printed NAT-LCE. Our method may open new possibilities for soft robotics, biomedical devices, and wearable electronics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06931v1-abstract-full').style.display = 'none'; document.getElementById('2411.06931v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06659">arXiv:2411.06659</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06659">pdf</a>, <a href="https://arxiv.org/format/2411.06659">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> An Efficient Memory Module for Graph Few-Shot Class-Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+A">Aijia Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+J">Junqi Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+B">Biqing Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06659v1-abstract-short" style="display: inline;"> Incremental graph learning has gained significant attention for its ability to address the catastrophic forgetting problem in graph representation learning. However, traditional methods often rely on a large number of labels for node classification, which is impractical in real-world applications. This makes few-shot incremental learning on graphs a pressing need. Current methods typically require&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06659v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06659v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06659v1-abstract-full" style="display: none;"> Incremental graph learning has gained significant attention for its ability to address the catastrophic forgetting problem in graph representation learning. However, traditional methods often rely on a large number of labels for node classification, which is impractical in real-world applications. This makes few-shot incremental learning on graphs a pressing need. Current methods typically require extensive training samples from meta-learning to build memory and perform intensive fine-tuning of GNN parameters, leading to high memory consumption and potential loss of previously learned knowledge. To tackle these challenges, we introduce Mecoin, an efficient method for building and maintaining memory. Mecoin employs Structured Memory Units to cache prototypes of learned categories, as well as Memory Construction Modules to update these prototypes for new categories through interactions between the nodes and the cached prototypes. Additionally, we have designed a Memory Representation Adaptation Module to store probabilities associated with each class prototype, reducing the need for parameter fine-tuning and lowering the forgetting rate. When a sample matches its corresponding class prototype, the relevant probabilities are retrieved from the MRaM. Knowledge is then distilled back into the GNN through a Graph Knowledge Distillation Module, preserving the model&#39;s memory. We analyze the effectiveness of Mecoin in terms of generalization error and explore the impact of different distillation strategies on model performance through experiments and VC-dimension analysis. Compared to other related works, Mecoin shows superior performance in accuracy and forgetting rate. Our code is publicly available on the https://github.com/Arvin0313/Mecoin-GFSCIL.git . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06659v1-abstract-full').style.display = 'none'; document.getElementById('2411.06659v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 6 figures, 38th Conference on Neural Information Processing Systems, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06481">arXiv:2411.06481</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06481">pdf</a>, <a href="https://arxiv.org/format/2411.06481">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> KMM: Key Frame Mask Mamba for Extended Motion Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zeyu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+H">Hang Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+A">Akide Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Q">Qi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+F">Feng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yiran Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Danning Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+H">Hao Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06481v1-abstract-short" style="display: inline;"> Human motion generation is a cut-edge area of research in generative computer vision, with promising applications in video creation, game development, and robotic manipulation. The recent Mamba architecture shows promising results in efficiently modeling long and complex sequences, yet two significant challenges remain: Firstly, directly applying Mamba to extended motion generation is ineffective,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06481v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06481v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06481v1-abstract-full" style="display: none;"> Human motion generation is a cut-edge area of research in generative computer vision, with promising applications in video creation, game development, and robotic manipulation. The recent Mamba architecture shows promising results in efficiently modeling long and complex sequences, yet two significant challenges remain: Firstly, directly applying Mamba to extended motion generation is ineffective, as the limited capacity of the implicit memory leads to memory decay. Secondly, Mamba struggles with multimodal fusion compared to Transformers, and lack alignment with textual queries, often confusing directions (left or right) or omitting parts of longer text queries. To address these challenges, our paper presents three key contributions: Firstly, we introduce KMM, a novel architecture featuring Key frame Masking Modeling, designed to enhance Mamba&#39;s focus on key actions in motion segments. This approach addresses the memory decay problem and represents a pioneering method in customizing strategic frame-level masking in SSMs. Additionally, we designed a contrastive learning paradigm for addressing the multimodal fusion problem in Mamba and improving the motion-text alignment. Finally, we conducted extensive experiments on the go-to dataset, BABEL, achieving state-of-the-art performance with a reduction of more than 57% in FID and 70% parameters compared to previous state-of-the-art methods. See project website: https://steve-zeyu-zhang.github.io/KMM <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06481v1-abstract-full').style.display = 'none'; document.getElementById('2411.06481v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06213">arXiv:2411.06213</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06213">pdf</a>, <a href="https://arxiv.org/ps/2411.06213">ps</a>, <a href="https://arxiv.org/format/2411.06213">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Incorporating Human Explanations for Robust Hate Speech Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J+L">Jennifer L. Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ladhak%2C+F">Faisal Ladhak</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Daniel Li</a>, <a href="/search/cs?searchtype=author&amp;query=Elhadad%2C+N">No茅mie Elhadad</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06213v1-abstract-short" style="display: inline;"> Given the black-box nature and complexity of large transformer language models (LM), concerns about generalizability and robustness present ethical implications for domains such as hate speech (HS) detection. Using the content rich Social Bias Frames dataset, containing human-annotated stereotypes, intent, and targeted groups, we develop a three stage analysis to evaluate if LMs faithfully assess&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06213v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06213v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06213v1-abstract-full" style="display: none;"> Given the black-box nature and complexity of large transformer language models (LM), concerns about generalizability and robustness present ethical implications for domains such as hate speech (HS) detection. Using the content rich Social Bias Frames dataset, containing human-annotated stereotypes, intent, and targeted groups, we develop a three stage analysis to evaluate if LMs faithfully assess hate speech. First, we observe the need for modeling contextually grounded stereotype intents to capture implicit semantic meaning. Next, we design a new task, Stereotype Intent Entailment (SIE), which encourages a model to contextually understand stereotype presence. Finally, through ablation tests and user studies, we find a SIE objective improves content understanding, but challenges remain in modeling implicit intent. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06213v1-abstract-full').style.display = 'none'; document.getElementById('2411.06213v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2021 ACL Unimplicit Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05614">arXiv:2411.05614</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.05614">pdf</a>, <a href="https://arxiv.org/format/2411.05614">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Acceleration for Deep Reinforcement Learning using Parallel and Distributed Computing: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhihong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xin Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Qiao%2C+P">Peng Qiao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongsheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05614v1-abstract-short" style="display: inline;"> Deep reinforcement learning has led to dramatic breakthroughs in the field of artificial intelligence for the past few years. As the amount of rollout experience data and the size of neural networks for deep reinforcement learning have grown continuously, handling the training process and reducing the time consumption using parallel and distributed computing is becoming an urgent and essential des&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05614v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05614v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05614v1-abstract-full" style="display: none;"> Deep reinforcement learning has led to dramatic breakthroughs in the field of artificial intelligence for the past few years. As the amount of rollout experience data and the size of neural networks for deep reinforcement learning have grown continuously, handling the training process and reducing the time consumption using parallel and distributed computing is becoming an urgent and essential desire. In this paper, we perform a broad and thorough investigation on training acceleration methodologies for deep reinforcement learning based on parallel and distributed computing, providing a comprehensive survey in this field with state-of-the-art methods and pointers to core references. In particular, a taxonomy of literature is provided, along with a discussion of emerging topics and open issues. This incorporates learning system architectures, simulation parallelism, computing parallelism, distributed synchronization mechanisms, and deep evolutionary reinforcement learning. Further, we compare 16 current open-source libraries and platforms with criteria of facilitating rapid development. Finally, we extrapolate future directions that deserve further research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05614v1-abstract-full').style.display = 'none'; document.getElementById('2411.05614v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by ACM Computing Surveys</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04075">arXiv:2411.04075</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.04075">pdf</a>, <a href="https://arxiv.org/format/2411.04075">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> M3SciQA: A Multi-Modal Multi-Document Scientific QA Benchmark for Evaluating Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chuhan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Shangguan%2C+Z">Ziyao Shangguan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yilun Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Deyuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yixin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cohan%2C+A">Arman Cohan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04075v1-abstract-short" style="display: inline;"> Existing benchmarks for evaluating foundation models mainly focus on single-document, text-only tasks. However, they often fail to fully capture the complexity of research workflows, which typically involve interpreting non-textual data and gathering information across multiple documents. To address this gap, we introduce M3SciQA, a multi-modal, multi-document scientific question answering benchma&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04075v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04075v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04075v1-abstract-full" style="display: none;"> Existing benchmarks for evaluating foundation models mainly focus on single-document, text-only tasks. However, they often fail to fully capture the complexity of research workflows, which typically involve interpreting non-textual data and gathering information across multiple documents. To address this gap, we introduce M3SciQA, a multi-modal, multi-document scientific question answering benchmark designed for a more comprehensive evaluation of foundation models. M3SciQA consists of 1,452 expert-annotated questions spanning 70 natural language processing paper clusters, where each cluster represents a primary paper along with all its cited documents, mirroring the workflow of comprehending a single paper by requiring multi-modal and multi-document data. With M3SciQA, we conduct a comprehensive evaluation of 18 foundation models. Our results indicate that current foundation models still significantly underperform compared to human experts in multi-modal information retrieval and in reasoning across multiple scientific documents. Additionally, we explore the implications of these findings for the future advancement of applying foundation models in multi-modal scientific literature analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04075v1-abstract-full').style.display = 'none'; document.getElementById('2411.04075v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03743">arXiv:2411.03743</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03743">pdf</a>, <a href="https://arxiv.org/format/2411.03743">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Automating Exploratory Proteomics Research via Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ding%2C+N">Ning Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+S">Shang Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+L">Linhai Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yifei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zaoqu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kaiyan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Y">Yibai Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Zuo%2C+Y">Yuxin Zuo</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhangren Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+E">Ermo Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Lv%2C+X">Xingtai Lv</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Youbang Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+F">Fuchu He</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+B">Bowen Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03743v1-abstract-short" style="display: inline;"> With the development of artificial intelligence, its contribution to science is evolving from simulating a complex problem to automating entire research processes and producing novel discoveries. Achieving this advancement requires both specialized general models grounded in real-world scientific data and iterative, exploratory frameworks that mirror human scientific methodologies. In this paper,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03743v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03743v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03743v1-abstract-full" style="display: none;"> With the development of artificial intelligence, its contribution to science is evolving from simulating a complex problem to automating entire research processes and producing novel discoveries. Achieving this advancement requires both specialized general models grounded in real-world scientific data and iterative, exploratory frameworks that mirror human scientific methodologies. In this paper, we present PROTEUS, a fully automated system for scientific discovery from raw proteomics data. PROTEUS uses large language models (LLMs) to perform hierarchical planning, execute specialized bioinformatics tools, and iteratively refine analysis workflows to generate high-quality scientific hypotheses. The system takes proteomics datasets as input and produces a comprehensive set of research objectives, analysis results, and novel biological hypotheses without human intervention. We evaluated PROTEUS on 12 proteomics datasets collected from various biological samples (e.g. immune cells, tumors) and different sample types (single-cell and bulk), generating 191 scientific hypotheses. These were assessed using both automatic LLM-based scoring on 5 metrics and detailed reviews from human experts. Results demonstrate that PROTEUS consistently produces reliable, logically coherent results that align well with existing literature while also proposing novel, evaluable hypotheses. The system&#39;s flexible architecture facilitates seamless integration of diverse analysis tools and adaptation to different proteomics data types. By automating complex proteomics analysis workflows and hypothesis generation, PROTEUS has the potential to considerably accelerate the pace of scientific discovery in proteomics research, enabling researchers to efficiently explore large-scale datasets and uncover biological insights. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03743v1-abstract-full').style.display = 'none'; document.getElementById('2411.03743v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03641">arXiv:2411.03641</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03641">pdf</a>, <a href="https://arxiv.org/format/2411.03641">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Constrained Multi-objective Bayesian Optimization through Optimistic Constraints Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Diantong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+F">Fengxue Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+C">Chong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuxin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03641v1-abstract-short" style="display: inline;"> Multi-objective Bayesian optimization has been widely adopted in scientific experiment design, including drug discovery and hyperparameter optimization. In practice, regulatory or safety concerns often impose additional thresholds on certain attributes of the experimental outcomes. Previous work has primarily focused on constrained single-objective optimization tasks or active search under constra&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03641v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03641v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03641v1-abstract-full" style="display: none;"> Multi-objective Bayesian optimization has been widely adopted in scientific experiment design, including drug discovery and hyperparameter optimization. In practice, regulatory or safety concerns often impose additional thresholds on certain attributes of the experimental outcomes. Previous work has primarily focused on constrained single-objective optimization tasks or active search under constraints. We propose CMOBO, a sample-efficient constrained multi-objective Bayesian optimization algorithm that balances learning of the feasible region (defined on multiple unknowns) with multi-objective optimization within the feasible region in a principled manner. We provide both theoretical justification and empirical evidence, demonstrating the efficacy of our approach on various synthetic benchmarks and real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03641v1-abstract-full').style.display = 'none'; document.getElementById('2411.03641v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03371">arXiv:2411.03371</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03371">pdf</a>, <a href="https://arxiv.org/ps/2411.03371">ps</a>, <a href="https://arxiv.org/format/2411.03371">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Blockchain-Based Multi-Path Mobile Access Point Selection for Secure 5G VANETs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhiou Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+W">Weian Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Li Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongyang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03371v1-abstract-short" style="display: inline;"> This letter presents a blockchain-based multi-path mobile access point (MAP) selection strategy for secure 5G vehicular ad-hoc networks (VANETs). The proposed method leverages blockchain technology for decentralized, transparent, and secure MAP selection, while the multi-path transmission strategy enhances network reliability and reduces communication delays. A trust-based attack detection mechani&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03371v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03371v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03371v1-abstract-full" style="display: none;"> This letter presents a blockchain-based multi-path mobile access point (MAP) selection strategy for secure 5G vehicular ad-hoc networks (VANETs). The proposed method leverages blockchain technology for decentralized, transparent, and secure MAP selection, while the multi-path transmission strategy enhances network reliability and reduces communication delays. A trust-based attack detection mechanism is integrated to ensure network security. Simulation results demonstrate that the proposed algorithm reduces both handover frequency and average communication delay by over 80%, and successfully identifies and excludes more than 95% of Sybil nodes, ensuring reliable and secure communication in highly dynamic vehicular environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03371v1-abstract-full').style.display = 'none'; document.getElementById('2411.03371v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03284">arXiv:2411.03284</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03284">pdf</a>, <a href="https://arxiv.org/format/2411.03284">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> SMoA: Improving Multi-agent Large Language Models with Sparse Mixture-of-Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dawei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+Z">Zhen Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Qian%2C+P">Peijia Qian</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yifan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chaudhary%2C+K+S">Kumar Satvik Chaudhary</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+L">Lijie Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+J">Jiayi Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03284v1-abstract-short" style="display: inline;"> While multi-agent systems have been shown to significantly enhance the performance of Large Language Models (LLMs) across various tasks and applications, the dense interaction between scaling agents potentially hampers their efficiency and diversity. To address these challenges, we draw inspiration from the sparse mixture-of-agents (SMoE) and propose a sparse mixture-of-agents (SMoA) framework to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03284v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03284v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03284v1-abstract-full" style="display: none;"> While multi-agent systems have been shown to significantly enhance the performance of Large Language Models (LLMs) across various tasks and applications, the dense interaction between scaling agents potentially hampers their efficiency and diversity. To address these challenges, we draw inspiration from the sparse mixture-of-agents (SMoE) and propose a sparse mixture-of-agents (SMoA) framework to improve the efficiency and diversity of multi-agent LLMs. Unlike completely connected structures, SMoA introduces novel Response Selection and Early Stopping mechanisms to sparsify information flows among individual LLM agents, striking a balance between performance and efficiency. Additionally, inspired by the expert diversity principle in SMoE frameworks for workload balance between experts, we assign distinct role descriptions to each LLM agent, fostering diverse and divergent thinking. Extensive experiments on reasoning, alignment, and fairness benchmarks demonstrate that SMoA achieves performance comparable to traditional mixture-of-agents approaches but with significantly lower computational costs. Further analysis reveals that SMoA is more stable, has a greater capacity to scale, and offers considerable potential through hyper-parameter optimization. Code and data will be available at: https://github.com/David-Li0406/SMoA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03284v1-abstract-full').style.display = 'none'; document.getElementById('2411.03284v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03027">arXiv:2411.03027</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03027">pdf</a>, <a href="https://arxiv.org/format/2411.03027">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Genetic Selection based Pinning Control with Asymmetric Coupling for Multi-Network Heterogeneous Vehicular Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+W">Weian Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Sha%2C+R">Ruizhi Sha</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Li Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Lun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongyang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03027v1-abstract-short" style="display: inline;"> To alleviate computational load on RSUs and cloud platforms, reduce communication bandwidth requirements, and provide a more stable vehicular network service, this paper proposes an optimized pinning control approach for heterogeneous multi-network vehicular ad-hoc networks (VANETs). In such networks, vehicles participate in multiple task-specific networks with asymmetric coupling and dynamic topo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03027v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03027v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03027v1-abstract-full" style="display: none;"> To alleviate computational load on RSUs and cloud platforms, reduce communication bandwidth requirements, and provide a more stable vehicular network service, this paper proposes an optimized pinning control approach for heterogeneous multi-network vehicular ad-hoc networks (VANETs). In such networks, vehicles participate in multiple task-specific networks with asymmetric coupling and dynamic topologies. We first establish a rigorous theoretical foundation by proving the stability of pinning control strategies under both single and multi-network conditions, deriving sufficient stability conditions using Lyapunov theory and linear matrix inequalities (LMIs). Building on this theoretical groundwork, we propose an adaptive genetic algorithm tailored to select optimal pinning nodes, effectively balancing LMI constraints while prioritizing overlapping nodes to enhance control efficiency. Extensive simulations across various network scales demonstrate that our approach achieves rapid consensus with a reduced number of control nodes, particularly when leveraging network overlaps. This work provides a comprehensive solution for efficient control node selection in complex vehicular networks, offering practical implications for deploying large-scale intelligent transportation systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03027v1-abstract-full').style.display = 'none'; document.getElementById('2411.03027v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02945">arXiv:2411.02945</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.02945">pdf</a>, <a href="https://arxiv.org/format/2411.02945">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Instant Resonance: Dual Strategy Enhances the Data Consensus Success Rate of Blockchain Threshold Signature Oracles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xian%2C+Y">Youquan Xian</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+X">Xueying Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chunpei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongcheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+P">Peng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+P">Peng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xianxian Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02945v1-abstract-short" style="display: inline;"> With the rapid development of Decentralized Finance (DeFi) and Real-World Assets (RWA), the importance of blockchain oracles in real-time data acquisition has become increasingly prominent. Using cryptographic techniques, threshold signature oracles can achieve consensus on data from multiple nodes and provide corresponding proofs to ensure the credibility and security of the information. However,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02945v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02945v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02945v1-abstract-full" style="display: none;"> With the rapid development of Decentralized Finance (DeFi) and Real-World Assets (RWA), the importance of blockchain oracles in real-time data acquisition has become increasingly prominent. Using cryptographic techniques, threshold signature oracles can achieve consensus on data from multiple nodes and provide corresponding proofs to ensure the credibility and security of the information. However, in real-time data acquisition, threshold signature methods face challenges such as data inconsistency and low success rates in heterogeneous environments, which limit their practical application potential. To address these issues, this paper proposes an innovative dual-strategy approach to enhance the success rate of data consensus in blockchain threshold signature oracles. Firstly, we introduce a Representative Enhanced Aggregation Strategy (REP-AG) that improves the representativeness of data submitted by nodes, ensuring consistency with data from other nodes, and thereby enhancing the usability of threshold signatures. Additionally, we present a Timing Optimization Strategy (TIM-OPT) that dynamically adjusts the timing of nodes&#39; access to data sources to maximize consensus success rates. Experimental results indicate that REP-AG improves the aggregation success rate by approximately 56.6\% compared to the optimal baseline, while the implementation of TIM-OPT leads to an average increase of approximately 32.9\% in consensus success rates across all scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02945v1-abstract-full').style.display = 'none'; document.getElementById('2411.02945v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to FGCS</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02183">arXiv:2411.02183</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.02183">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> </div> </div> <p class="title is-5 mathjax"> Vehicles, Pedestrians, and E-bikes: a Three-party Game at Right-turn-on-red Crossroads Revealing the Dual and Irrational Role of E-bikes that Risks Traffic Safety </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+G">Gangcheng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Shu%2C+Y">Yeshuo Shu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+K">Keyi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Donghang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+L">Liyan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02183v1-abstract-short" style="display: inline;"> The widespread use of e-bikes has facilitated short-distance travel yet led to confusion and safety problems in road traffic. This study focuses on the dual characteristics of e-bikes in traffic conflicts: they resemble pedestrians when interacting with motor vehicles and behave like motor vehicles when in conflict with pedestrians, which raises the right of way concerns when potential conflicts a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02183v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02183v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02183v1-abstract-full" style="display: none;"> The widespread use of e-bikes has facilitated short-distance travel yet led to confusion and safety problems in road traffic. This study focuses on the dual characteristics of e-bikes in traffic conflicts: they resemble pedestrians when interacting with motor vehicles and behave like motor vehicles when in conflict with pedestrians, which raises the right of way concerns when potential conflicts are at stake. Using the Quantal Response Equilibrium model, this research analyzes the behavioral choice differences of three groups of road users (vehicle-pedestrian, vehicle-e-bike, e-bike-pedestrian) at right-turn-on-red crossroads in right-turning lines and straight-going lines conflict scenarios. The results show that the behavior of e-bikes is more similar to that of motor vehicles than pedestrians overall, and their interactions with either pedestrians or motor vehicles do not establish a reasonable order, increasing the likelihood of confusion and conflict. In contrast, a mutual understanding has developed between motor vehicles and pedestrians, where motor vehicles tend to yield, and pedestrians tend to cross. By clarifying the game theoretical model and introducing the rationality parameter, this study precisely locates the role of e-bikes among road users, which provides a reliable theoretical basis for optimizing traffic regulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02183v1-abstract-full').style.display = 'none'; document.getElementById('2411.02183v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01843">arXiv:2411.01843</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01843">pdf</a>, <a href="https://arxiv.org/format/2411.01843">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Dissertation: On the Theoretical Foundation of Model Comparison and Evaluation for Recommender System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01843v1-abstract-short" style="display: inline;"> Recommender systems have become increasingly important with the rise of the web as a medium for electronic and business transactions. One of the key drivers of this technology is the ease with which users can provide feedback about their likes and dislikes through simple clicks of a mouse. This feedback is commonly collected in the form of ratings, but can also be inferred from a user&#39;s browsing a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01843v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01843v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01843v1-abstract-full" style="display: none;"> Recommender systems have become increasingly important with the rise of the web as a medium for electronic and business transactions. One of the key drivers of this technology is the ease with which users can provide feedback about their likes and dislikes through simple clicks of a mouse. This feedback is commonly collected in the form of ratings, but can also be inferred from a user&#39;s browsing and purchasing history. Recommender systems utilize users&#39; historical data to infer customer interests and provide personalized recommendations. The basic principle of recommendations is that significant dependencies exist between user- and item-centric activity, which can be learned in a data-driven manner to make accurate predictions. Collaborative filtering is one family of recommendation algorithms that uses ratings from multiple users to predict missing ratings or uses binary click information to predict potential clicks. However, recommender systems can be more complex and incorporate auxiliary data such as content-based attributes, user interactions, and contextual information. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01843v1-abstract-full').style.display = 'none'; document.getElementById('2411.01843v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2312.08517</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01191">arXiv:2411.01191</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01191">pdf</a>, <a href="https://arxiv.org/format/2411.01191">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Prophet Secretary and Matching: the Significance of the Largest Item </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Ziyun Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Zhiyi Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongchen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+Z+G">Zhihao Gavin Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01191v1-abstract-short" style="display: inline;"> The prophet secretary problem is a combination of the prophet inequality and the secretary problem, where elements are drawn from known independent distributions and arrive in uniformly random order. In this work, we design 1) a $0.688$-competitive algorithm, that breaks the $0.675$ barrier of blind strategies (Correa, Saona, Ziliotto, 2021), and 2) a $0.641$-competitive algorithm for the prophet&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01191v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01191v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01191v1-abstract-full" style="display: none;"> The prophet secretary problem is a combination of the prophet inequality and the secretary problem, where elements are drawn from known independent distributions and arrive in uniformly random order. In this work, we design 1) a $0.688$-competitive algorithm, that breaks the $0.675$ barrier of blind strategies (Correa, Saona, Ziliotto, 2021), and 2) a $0.641$-competitive algorithm for the prophet secretary matching problem, that breaks the $1-1/e\approx 0.632$ barrier for the first time. Our second result also applies to the query-commit model of weighted stochastic matching and improves the state-of-the-art ratio (Derakhshan and Farhadi, 2023). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01191v1-abstract-full').style.display = 'none'; document.getElementById('2411.01191v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01033">arXiv:2411.01033</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01033">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Many-Objective Search-Based Coverage-Guided Automatic Test Generation for Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongcheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wong%2C+W+E">W. Eric Wong</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Hu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+M">Man Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01033v1-abstract-short" style="display: inline;"> To ensure the reliability of DNN systems and address the test generation problem for neural networks, this paper proposes a fuzzing test generation technique based on many-objective optimization algorithms. Traditional fuzz testing employs random search, leading to lower testing efficiency and tends to generate numerous invalid test cases. By utilizing many-objective optimization techniques, effec&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01033v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01033v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01033v1-abstract-full" style="display: none;"> To ensure the reliability of DNN systems and address the test generation problem for neural networks, this paper proposes a fuzzing test generation technique based on many-objective optimization algorithms. Traditional fuzz testing employs random search, leading to lower testing efficiency and tends to generate numerous invalid test cases. By utilizing many-objective optimization techniques, effective test cases can be generated. To achieve high test coverage, this paper proposes several improvement strategies. The frequency-based fuzz sampling strategy assigns priorities based on the frequency of selection of initial data, avoiding the repetitive selection of the same data and enhancing the quality of initial data better than random sampling strategies. To address the issue that global search may yield test not satisfying semantic constraints, a local search strategy based on the Monte Carlo tree search is proposed to enhance the algorithm&#39;s local search capabilities. Furthermore, we improve the diversity of the population and the algorithm&#39;s global search capability by updating SPEA2&#39;s external archive based on a decomposition-based archiving strategy. To validate the effectiveness of the proposed approach, experiments were conducted on several public datasets and various neural network models. The results reveal that, compared to random and clustering-based sampling, the frequency-based fuzz sampling strategy provides a greater improvement in coverage rate in the later stages of iterations. On complex networks like VGG16, the improved SPEA2 algorithm increased the coverage rate by about 12% across several coverage metrics, and by approximately 40% on LeNet series networks. The experimental results also indicates that the newly generated test cases not only exhibit higher coverage rates but also generate adversarial samples that reveal model errors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01033v1-abstract-full').style.display = 'none'; document.getElementById('2411.01033v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00822">arXiv:2411.00822</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.00822">pdf</a>, <a href="https://arxiv.org/format/2411.00822">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> EEG-based Multimodal Representation Learning for Emotion Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yin%2C+K">Kang Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+H">Hye-Bin Shin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seong-Whan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00822v1-abstract-short" style="display: inline;"> Multimodal learning has been a popular area of research, yet integrating electroencephalogram (EEG) data poses unique challenges due to its inherent variability and limited availability. In this paper, we introduce a novel multimodal framework that accommodates not only conventional modalities such as video, images, and audio, but also incorporates EEG data. Our framework is designed to flexibly h&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00822v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00822v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00822v1-abstract-full" style="display: none;"> Multimodal learning has been a popular area of research, yet integrating electroencephalogram (EEG) data poses unique challenges due to its inherent variability and limited availability. In this paper, we introduce a novel multimodal framework that accommodates not only conventional modalities such as video, images, and audio, but also incorporates EEG data. Our framework is designed to flexibly handle varying input sizes, while dynamically adjusting attention to account for feature importance across modalities. We evaluate our approach on a recently introduced emotion recognition dataset that combines data from three modalities, making it an ideal testbed for multimodal learning. The experimental results provide a benchmark for the dataset and demonstrate the effectiveness of the proposed framework. This work highlights the potential of integrating EEG into multimodal systems, paving the way for more robust and comprehensive applications in emotion recognition and beyond. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00822v1-abstract-full').style.display = 'none'; document.getElementById('2411.00822v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23754">arXiv:2410.23754</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.23754">pdf</a>, <a href="https://arxiv.org/format/2410.23754">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> RealMind: Zero-Shot EEG-Based Visual Decoding and Captioning Using Multi-Modal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+H">Haoyang Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+M">Mingyang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yuang Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+C">Chen Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Quanying Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23754v1-abstract-short" style="display: inline;"> Despite significant progress in visual decoding with fMRI data, its high cost and low temporal resolution limit widespread applicability. To address these challenges, we introduce RealMind, a novel EEG-based visual decoding framework that leverages multi-modal models to efficiently interpret semantic information. By integrating semantic and geometric consistency learning, RealMind enhances feature&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23754v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23754v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23754v1-abstract-full" style="display: none;"> Despite significant progress in visual decoding with fMRI data, its high cost and low temporal resolution limit widespread applicability. To address these challenges, we introduce RealMind, a novel EEG-based visual decoding framework that leverages multi-modal models to efficiently interpret semantic information. By integrating semantic and geometric consistency learning, RealMind enhances feature alignment, leading to improved decoding performance. Our framework achieves a 56.73\% Top-5 accuracy in a 200-way retrieval task and a 26.59\% BLEU-1 score in a 200-way visual captioning task, representing the first successful attempt at zero-shot visual captioning using EEG data. RealMind provides a robust, adaptable, and cost-effective alternative to fMRI-based methods, offering scalable solutions for EEG-based visual decoding in practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23754v1-abstract-full').style.display = 'none'; document.getElementById('2410.23754v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23004">arXiv:2410.23004</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.23004">pdf</a>, <a href="https://arxiv.org/format/2410.23004">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DexGraspNet 2.0: Learning Generative Dexterous Grasping in Large-scale Synthetic Cluttered Scenes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jialiang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Haoran Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Danshi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+X">Xinqiang Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Geng%2C+H">Haoran Geng</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yufei Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jiayi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">He Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23004v1-abstract-short" style="display: inline;"> Grasping in cluttered scenes remains highly challenging for dexterous hands due to the scarcity of data. To address this problem, we present a large-scale synthetic benchmark, encompassing 1319 objects, 8270 scenes, and 427 million grasps. Beyond benchmarking, we also propose a novel two-stage grasping method that learns efficiently from data by using a diffusion model that conditions on local geo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23004v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23004v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23004v1-abstract-full" style="display: none;"> Grasping in cluttered scenes remains highly challenging for dexterous hands due to the scarcity of data. To address this problem, we present a large-scale synthetic benchmark, encompassing 1319 objects, 8270 scenes, and 427 million grasps. Beyond benchmarking, we also propose a novel two-stage grasping method that learns efficiently from data by using a diffusion model that conditions on local geometry. Our proposed generative method outperforms all baselines in simulation experiments. Furthermore, with the aid of test-time-depth restoration, our method demonstrates zero-shot sim-to-real transfer, attaining 90.7% real-world dexterous grasping success rate in cluttered scenes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23004v1-abstract-full').style.display = 'none'; document.getElementById('2410.23004v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21276">arXiv:2410.21276</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.21276">pdf</a>, <a href="https://arxiv.org/format/2410.21276">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> GPT-4o System Card </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=OpenAI"> OpenAI</a>, <a href="/search/cs?searchtype=author&amp;query=%3A"> :</a>, <a href="/search/cs?searchtype=author&amp;query=Hurst%2C+A">Aaron Hurst</a>, <a href="/search/cs?searchtype=author&amp;query=Lerer%2C+A">Adam Lerer</a>, <a href="/search/cs?searchtype=author&amp;query=Goucher%2C+A+P">Adam P. Goucher</a>, <a href="/search/cs?searchtype=author&amp;query=Perelman%2C+A">Adam Perelman</a>, <a href="/search/cs?searchtype=author&amp;query=Ramesh%2C+A">Aditya Ramesh</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+A">Aidan Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Ostrow%2C+A">AJ Ostrow</a>, <a href="/search/cs?searchtype=author&amp;query=Welihinda%2C+A">Akila Welihinda</a>, <a href="/search/cs?searchtype=author&amp;query=Hayes%2C+A">Alan Hayes</a>, <a href="/search/cs?searchtype=author&amp;query=Radford%2C+A">Alec Radford</a>, <a href="/search/cs?searchtype=author&amp;query=M%C4%85dry%2C+A">Aleksander M膮dry</a>, <a href="/search/cs?searchtype=author&amp;query=Baker-Whitcomb%2C+A">Alex Baker-Whitcomb</a>, <a href="/search/cs?searchtype=author&amp;query=Beutel%2C+A">Alex Beutel</a>, <a href="/search/cs?searchtype=author&amp;query=Borzunov%2C+A">Alex Borzunov</a>, <a href="/search/cs?searchtype=author&amp;query=Carney%2C+A">Alex Carney</a>, <a href="/search/cs?searchtype=author&amp;query=Chow%2C+A">Alex Chow</a>, <a href="/search/cs?searchtype=author&amp;query=Kirillov%2C+A">Alex Kirillov</a>, <a href="/search/cs?searchtype=author&amp;query=Nichol%2C+A">Alex Nichol</a>, <a href="/search/cs?searchtype=author&amp;query=Paino%2C+A">Alex Paino</a>, <a href="/search/cs?searchtype=author&amp;query=Renzin%2C+A">Alex Renzin</a>, <a href="/search/cs?searchtype=author&amp;query=Passos%2C+A+T">Alex Tachard Passos</a>, <a href="/search/cs?searchtype=author&amp;query=Kirillov%2C+A">Alexander Kirillov</a>, <a href="/search/cs?searchtype=author&amp;query=Christakis%2C+A">Alexi Christakis</a> , et al. (395 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21276v1-abstract-short" style="display: inline;"> GPT-4o is an autoregressive omni model that accepts as input any combination of text, audio, image, and video, and generates any combination of text, audio, and image outputs. It&#39;s trained end-to-end across text, vision, and audio, meaning all inputs and outputs are processed by the same neural network. GPT-4o can respond to audio inputs in as little as 232 milliseconds, with an average of 320 mil&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21276v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21276v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21276v1-abstract-full" style="display: none;"> GPT-4o is an autoregressive omni model that accepts as input any combination of text, audio, image, and video, and generates any combination of text, audio, and image outputs. It&#39;s trained end-to-end across text, vision, and audio, meaning all inputs and outputs are processed by the same neural network. GPT-4o can respond to audio inputs in as little as 232 milliseconds, with an average of 320 milliseconds, which is similar to human response time in conversation. It matches GPT-4 Turbo performance on text in English and code, with significant improvement on text in non-English languages, while also being much faster and 50\% cheaper in the API. GPT-4o is especially better at vision and audio understanding compared to existing models. In line with our commitment to building AI safely and consistent with our voluntary commitments to the White House, we are sharing the GPT-4o System Card, which includes our Preparedness Framework evaluations. In this System Card, we provide a detailed look at GPT-4o&#39;s capabilities, limitations, and safety evaluations across multiple categories, focusing on speech-to-speech while also evaluating text and image capabilities, and measures we&#39;ve implemented to ensure the model is safe and aligned. We also include third-party assessments on dangerous capabilities, as well as discussion of potential societal impacts of GPT-4o&#39;s text and vision capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21276v1-abstract-full').style.display = 'none'; document.getElementById('2410.21276v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20406">arXiv:2410.20406</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.20406">pdf</a>, <a href="https://arxiv.org/format/2410.20406">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Point-PRC: A Prompt Learning Based Regulation Framework for Generalizable Point Cloud Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Hongyu Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Ke%2C+Q">Qiuhong Ke</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yongcai Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+K">Kang Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Deying Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+J">Jianfei Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20406v2-abstract-short" style="display: inline;"> This paper investigates the 3D domain generalization (3DDG) ability of large 3D models based on prevalent prompt learning. Recent works demonstrate the performances of 3D point cloud recognition can be boosted remarkably by parameter-efficient prompt tuning. However, we observe that the improvement on downstream tasks comes at the expense of a severe drop in 3D domain generalization. To resolve th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20406v2-abstract-full').style.display = 'inline'; document.getElementById('2410.20406v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20406v2-abstract-full" style="display: none;"> This paper investigates the 3D domain generalization (3DDG) ability of large 3D models based on prevalent prompt learning. Recent works demonstrate the performances of 3D point cloud recognition can be boosted remarkably by parameter-efficient prompt tuning. However, we observe that the improvement on downstream tasks comes at the expense of a severe drop in 3D domain generalization. To resolve this challenge, we present a comprehensive regulation framework that allows the learnable prompts to actively interact with the well-learned general knowledge in large 3D models to maintain good generalization. Specifically, the proposed framework imposes multiple explicit constraints on the prompt learning trajectory by maximizing the mutual agreement between task-specific predictions and task-agnostic knowledge. We design the regulation framework as a plug-and-play module to embed into existing representative large 3D models. Surprisingly, our method not only realizes consistently increasing generalization ability but also enhances task-specific 3D recognition performances across various 3DDG benchmarks by a clear margin. Considering the lack of study and evaluation on 3DDG, we also create three new benchmarks, namely base-to-new, cross-dataset and few-shot generalization benchmarks, to enrich the field and inspire future research. Code and benchmarks are available at \url{https://github.com/auniquesun/Point-PRC}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20406v2-abstract-full').style.display = 'none'; document.getElementById('2410.20406v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18005">arXiv:2410.18005</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.18005">pdf</a>, <a href="https://arxiv.org/format/2410.18005">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Random space-time sampling and reconstruction of sparse bandlimited graph diffusion field </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+L">Longxiu Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+S">Sui Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Q">Qing Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18005v1-abstract-short" style="display: inline;"> In this work, we investigate the sampling and reconstruction of spectrally $s$-sparse bandlimited graph signals governed by heat diffusion processes. We propose a random space-time sampling regime, referred to as {randomized} dynamical sampling, where a small subset of space-time nodes is randomly selected at each time step based on a probability distribution. To analyze the recovery problem, we e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18005v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18005v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18005v1-abstract-full" style="display: none;"> In this work, we investigate the sampling and reconstruction of spectrally $s$-sparse bandlimited graph signals governed by heat diffusion processes. We propose a random space-time sampling regime, referred to as {randomized} dynamical sampling, where a small subset of space-time nodes is randomly selected at each time step based on a probability distribution. To analyze the recovery problem, we establish a rigorous mathematical framework by introducing the parameter \textit{the dynamic spectral graph weighted coherence}. This key parameter governs the number of space-time samples needed for stable recovery and extends the idea of variable density sampling to the context of dynamical systems. By optimizing the sampling probability distribution, we show that as few as $\mathcal{O}(s \log(k))$ space-time samples are sufficient for accurate reconstruction in optimal scenarios, where $k$ denotes the bandwidth of the signal. Our framework encompasses both static and dynamic cases, demonstrating a reduction in the number of spatial samples needed at each time step by exploiting temporal correlations. Furthermore, we provide a computationally efficient and robust algorithm for signal reconstruction. Numerical experiments validate our theoretical results and illustrate the practical efficacy of our proposed methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18005v1-abstract-full').style.display = 'none'; document.getElementById('2410.18005v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 94A20; 94A12 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17814">arXiv:2410.17814</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.17814">pdf</a>, <a href="https://arxiv.org/format/2410.17814">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning Lossless Compression for High Bit-Depth Volumetric Medical Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+K">Kai Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+Y">Yuanchao Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Daxin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhai%2C+D">Deming Zhai</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+J">Junjun Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xianming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17814v1-abstract-short" style="display: inline;"> Recent advances in learning-based methods have markedly enhanced the capabilities of image compression. However, these methods struggle with high bit-depth volumetric medical images, facing issues such as degraded performance, increased memory demand, and reduced processing speed. To address these challenges, this paper presents the Bit-Division based Lossless Volumetric Image Compression (BD-LVIC&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17814v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17814v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17814v1-abstract-full" style="display: none;"> Recent advances in learning-based methods have markedly enhanced the capabilities of image compression. However, these methods struggle with high bit-depth volumetric medical images, facing issues such as degraded performance, increased memory demand, and reduced processing speed. To address these challenges, this paper presents the Bit-Division based Lossless Volumetric Image Compression (BD-LVIC) framework, which is tailored for high bit-depth medical volume compression. The BD-LVIC framework skillfully divides the high bit-depth volume into two lower bit-depth segments: the Most Significant Bit-Volume (MSBV) and the Least Significant Bit-Volume (LSBV). The MSBV concentrates on the most significant bits of the volumetric medical image, capturing vital structural details in a compact manner. This reduction in complexity greatly improves compression efficiency using traditional codecs. Conversely, the LSBV deals with the least significant bits, which encapsulate intricate texture details. To compress this detailed information effectively, we introduce an effective learning-based compression model equipped with a Transformer-Based Feature Alignment Module, which exploits both intra-slice and inter-slice redundancies to accurately align features. Subsequently, a Parallel Autoregressive Coding Module merges these features to precisely estimate the probability distribution of the least significant bit-planes. Our extensive testing demonstrates that the BD-LVIC framework not only sets new performance benchmarks across various datasets but also maintains a competitive coding speed, highlighting its significant potential and practical utility in the realm of volumetric medical image compression. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17814v1-abstract-full').style.display = 'none'; document.getElementById('2410.17814v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17526">arXiv:2410.17526</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.17526">pdf</a>, <a href="https://arxiv.org/format/2410.17526">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GDDA: Semantic OOD Detection on Graphs under Covariate Shift via Score-Based Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=He%2C+Z">Zhixia He</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+C">Chen Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Shao%2C+M">Minglai Shao</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Y">Yujie Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+Q">Qin Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17526v1-abstract-short" style="display: inline;"> Out-of-distribution (OOD) detection poses a significant challenge for Graph Neural Networks (GNNs), particularly in open-world scenarios with varying distribution shifts. Most existing OOD detection methods on graphs primarily focus on identifying instances in test data domains caused by either semantic shifts (changes in data classes) or covariate shifts (changes in data features), while leaving&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17526v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17526v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17526v1-abstract-full" style="display: none;"> Out-of-distribution (OOD) detection poses a significant challenge for Graph Neural Networks (GNNs), particularly in open-world scenarios with varying distribution shifts. Most existing OOD detection methods on graphs primarily focus on identifying instances in test data domains caused by either semantic shifts (changes in data classes) or covariate shifts (changes in data features), while leaving the simultaneous occurrence of both distribution shifts under-explored. In this work, we address both types of shifts simultaneously and introduce a novel challenge for OOD detection on graphs: graph-level semantic OOD detection under covariate shift. In this scenario, variations between the training and test domains result from the concurrent presence of both covariate and semantic shifts, where only graphs associated with unknown classes are identified as OOD samples (OODs). To tackle this challenge, we propose a novel two-phase framework called Graph Disentangled Diffusion Augmentation (GDDA). The first phase focuses on disentangling graph representations into domain-invariant semantic factors and domain-specific style factors. In the second phase, we introduce a novel distribution-shift-controlled score-based generative diffusion model that generates latent factors outside the training semantic and style spaces. Additionally, auxiliary pseudo-in-distribution (InD) and pseudo-OOD graph representations are employed to enhance the effectiveness of the energy-based semantic OOD detector. Extensive empirical studies on three benchmark datasets demonstrate that our approach outperforms state-of-the-art baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17526v1-abstract-full').style.display = 'none'; document.getElementById('2410.17526v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17277">arXiv:2410.17277</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.17277">pdf</a>, <a href="https://arxiv.org/format/2410.17277">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> A practical applicable quantum-classical hybrid ant colony algorithm for the NISQ era </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+Q">Qian Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Liang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+M">Mohan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Q">Qichun Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiaogang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Da-Chuang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+H">Hua Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17277v1-abstract-short" style="display: inline;"> Quantum ant colony optimization (QACO) has drew much attention since it combines the advantages of quantum computing and ant colony optimization (ACO) algorithm overcoming some limitations of the traditional ACO algorithm. However,due to the hardware resource limitations of currently available quantum computers, the practical application of the QACO is still not realized. In this paper, we develop&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17277v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17277v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17277v1-abstract-full" style="display: none;"> Quantum ant colony optimization (QACO) has drew much attention since it combines the advantages of quantum computing and ant colony optimization (ACO) algorithm overcoming some limitations of the traditional ACO algorithm. However,due to the hardware resource limitations of currently available quantum computers, the practical application of the QACO is still not realized. In this paper, we developed a quantum-classical hybrid algorithm by combining the clustering algorithm with QACO algorithm.This extended QACO can handle large-scale optimization problems with currently available quantum computing resource. We have tested the effectiveness and performance of the extended QACO algorithm with the Travelling Salesman Problem (TSP) as benchmarks, and found the algorithm achieves better performance under multiple diverse datasets. In addition, we investigated the noise impact on the extended QACO and evaluated its operation possibility on current available noisy intermediate scale quantum(NISQ) devices. Our work shows that the combination of the clustering algorithm with QACO effectively improved its problem solving scale, which makes its practical application possible in current NISQ era of quantum computing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17277v1-abstract-full').style.display = 'none'; document.getElementById('2410.17277v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2403.00367</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16942">arXiv:2410.16942</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16942">pdf</a>, <a href="https://arxiv.org/format/2410.16942">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DiP-GO: A Diffusion Pruner via Few-step Gradient Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+H">Haowei Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+D">Dehua Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Ji Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+M">Mingjie Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+J">Jintu Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+J">Jinzhang Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+F">Fan Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+L">Lu Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Tiwari%2C+S">Spandan Tiwari</a>, <a href="/search/cs?searchtype=author&amp;query=Sirasao%2C+A">Ashish Sirasao</a>, <a href="/search/cs?searchtype=author&amp;query=Yong%2C+J">Jun-Hai Yong</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+B">Bin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Barsoum%2C+E">Emad Barsoum</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16942v1-abstract-short" style="display: inline;"> Diffusion models have achieved remarkable progress in the field of image generation due to their outstanding capabilities. However, these models require substantial computing resources because of the multi-step denoising process during inference. While traditional pruning methods have been employed to optimize these models, the retraining process necessitates large-scale training datasets and exte&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16942v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16942v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16942v1-abstract-full" style="display: none;"> Diffusion models have achieved remarkable progress in the field of image generation due to their outstanding capabilities. However, these models require substantial computing resources because of the multi-step denoising process during inference. While traditional pruning methods have been employed to optimize these models, the retraining process necessitates large-scale training datasets and extensive computational costs to maintain generalization ability, making it neither convenient nor efficient. Recent studies attempt to utilize the similarity of features across adjacent denoising stages to reduce computational costs through simple and static strategies. However, these strategies cannot fully harness the potential of the similar feature patterns across adjacent timesteps. In this work, we propose a novel pruning method that derives an efficient diffusion model via a more intelligent and differentiable pruner. At the core of our approach is casting the model pruning process into a SubNet search process. Specifically, we first introduce a SuperNet based on standard diffusion via adding some backup connections built upon the similar features. We then construct a plugin pruner network and design optimization losses to identify redundant computation. Finally, our method can identify an optimal SubNet through few-step gradient optimization and a simple post-processing procedure. We conduct extensive experiments on various diffusion models including Stable Diffusion series and DiTs. Our DiP-GO approach achieves 4.4 x speedup for SD-1.5 without any loss of accuracy, significantly outperforming the previous state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16942v1-abstract-full').style.display = 'none'; document.getElementById('2410.16942v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16676">arXiv:2410.16676</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16676">pdf</a>, <a href="https://arxiv.org/format/2410.16676">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Improving Causal Reasoning in Large Language Models: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yu%2C+L">Longxuan Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+D">Delin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+S">Siheng Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Q">Qingyang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qingzhen Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dawei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhikai Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiaoze Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+L">Liangming Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16676v3-abstract-short" style="display: inline;"> Causal reasoning (CR) is a crucial aspect of intelligence, essential for problem-solving, decision-making, and understanding the world. While large language models (LLMs) can generate rationales for their outputs, their ability to reliably perform causal reasoning remains uncertain, often falling short in tasks requiring a deep understanding of causality. In this survey, we provide a comprehensive&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16676v3-abstract-full').style.display = 'inline'; document.getElementById('2410.16676v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16676v3-abstract-full" style="display: none;"> Causal reasoning (CR) is a crucial aspect of intelligence, essential for problem-solving, decision-making, and understanding the world. While large language models (LLMs) can generate rationales for their outputs, their ability to reliably perform causal reasoning remains uncertain, often falling short in tasks requiring a deep understanding of causality. In this survey, we provide a comprehensive review of research aimed at enhancing LLMs for causal reasoning. We categorize existing methods based on the role of LLMs: either as reasoning engines or as helpers providing knowledge or data to traditional CR methods, followed by a detailed discussion of the methodologies in each category. We then evaluate the performance of LLMs on various causal reasoning tasks, providing key findings and in-depth analysis. Finally, we provide insights from current studies and highlight promising directions for future research. We aim for this work to serve as a comprehensive resource, fostering further advancements in causal reasoning with LLMs. Resources are available at https://github.com/chendl02/Awesome-LLM-causal-reasoning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16676v3-abstract-full').style.display = 'none'; document.getElementById('2410.16676v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16668">arXiv:2410.16668</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16668">pdf</a>, <a href="https://arxiv.org/format/2410.16668">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Satori: Towards Proactive AR Assistant with Belief-Desire-Intention User Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chenyi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+G">Guande Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Chan%2C+G+Y">Gromit Yeuk-Yin Chan</a>, <a href="/search/cs?searchtype=author&amp;query=Turakhia%2C+D+G">Dishita G Turakhia</a>, <a href="/search/cs?searchtype=author&amp;query=Quispe%2C+S+C">Sonia Castelo Quispe</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Welch%2C+L">Leslie Welch</a>, <a href="/search/cs?searchtype=author&amp;query=Silva%2C+C">Claudio Silva</a>, <a href="/search/cs?searchtype=author&amp;query=Qian%2C+J">Jing Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16668v1-abstract-short" style="display: inline;"> Augmented Reality assistance are increasingly popular for supporting users with tasks like assembly and cooking. However, current practice typically provide reactive responses initialized from user requests, lacking consideration of rich contextual and user-specific information. To address this limitation, we propose a novel AR assistance system, Satori, that models both user states and environmen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16668v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16668v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16668v1-abstract-full" style="display: none;"> Augmented Reality assistance are increasingly popular for supporting users with tasks like assembly and cooking. However, current practice typically provide reactive responses initialized from user requests, lacking consideration of rich contextual and user-specific information. To address this limitation, we propose a novel AR assistance system, Satori, that models both user states and environmental contexts to deliver proactive guidance. Our system combines the Belief-Desire-Intention (BDI) model with a state-of-the-art multi-modal large language model (LLM) to infer contextually appropriate guidance. The design is informed by two formative studies involving twelve experts. A sixteen within-subject study find that Satori achieves performance comparable to an designer-created Wizard-of-Oz (WoZ) system without relying on manual configurations or heuristics, thereby enhancing generalizability, reusability and opening up new possibilities for AR assistance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16668v1-abstract-full').style.display = 'none'; document.getElementById('2410.16668v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14117">arXiv:2410.14117</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.14117">pdf</a>, <a href="https://arxiv.org/format/2410.14117">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> MarineGym: Accelerated Training for Underwater Vehicles with High-Fidelity RL Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chu%2C+S">Shuguang Chu</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Zebin Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+M">Mingwei Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dejun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Carlucho%2C+I">Ignacio Carlucho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14117v1-abstract-short" style="display: inline;"> Reinforcement Learning (RL) is a promising solution, allowing Unmanned Underwater Vehicles (UUVs) to learn optimal behaviors through trial and error. However, existing simulators lack efficient integration with RL methods, limiting training scalability and performance. This paper introduces MarineGym, a novel simulation framework designed to enhance RL training efficiency for UUVs by utilizing GPU&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14117v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14117v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14117v1-abstract-full" style="display: none;"> Reinforcement Learning (RL) is a promising solution, allowing Unmanned Underwater Vehicles (UUVs) to learn optimal behaviors through trial and error. However, existing simulators lack efficient integration with RL methods, limiting training scalability and performance. This paper introduces MarineGym, a novel simulation framework designed to enhance RL training efficiency for UUVs by utilizing GPU acceleration. MarineGym offers a 10,000-fold performance improvement over real-time simulation on a single GPU, enabling rapid training of RL algorithms across multiple underwater tasks. Key features include realistic dynamic modeling of UUVs, parallel environment execution, and compatibility with popular RL frameworks like PyTorch and TorchRL. The framework is validated through four distinct tasks: station-keeping, circle tracking, helical tracking, and lemniscate tracking. This framework sets the stage for advancing RL in underwater robotics and facilitating efficient training in complex, dynamic environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14117v1-abstract-full').style.display = 'none'; document.getElementById('2410.14117v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the 40th Anniversary of the IEEE Conference on Robotics and Automation (ICRA@40)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13847">arXiv:2410.13847</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.13847">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Subsampling and Learned Model Improve Spatiotemporal Resolution of Tactile Skin </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Slepyan%2C+A">Ariel Slepyan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Aug%2C+A">Aidan Aug</a>, <a href="/search/cs?searchtype=author&amp;query=Sankar%2C+S">Sriramana Sankar</a>, <a href="/search/cs?searchtype=author&amp;query=Tran%2C+T">Trac Tran</a>, <a href="/search/cs?searchtype=author&amp;query=Thakor%2C+N">Nitish Thakor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13847v1-abstract-short" style="display: inline;"> High-speed tactile arrays are essential for real-time robotic control in unstructured environments, but high pixel counts limit readout rates of most large tactile arrays to below 100Hz. We introduce ACTS - adaptive compressive tactile subsampling - a method that efficiently samples tactile matrices and reconstructs interactions using sparse recovery and a learned tactile dictionary. Tested on a 1&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13847v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13847v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13847v1-abstract-full" style="display: none;"> High-speed tactile arrays are essential for real-time robotic control in unstructured environments, but high pixel counts limit readout rates of most large tactile arrays to below 100Hz. We introduce ACTS - adaptive compressive tactile subsampling - a method that efficiently samples tactile matrices and reconstructs interactions using sparse recovery and a learned tactile dictionary. Tested on a 1024-pixel sensor array (32x32), ACTS increased frame rates by 18X compared to raster scanning, with minimal error. For the first time in large-area tactile skin, we demonstrate rapid object classification within 20ms of contact, high-speed projectile detection, ricochet angle estimation, and deformation tracking through enhanced spatiotemporal resolution. Our method can be implemented in firmware, upgrading existing low-cost, flexible, and robust tactile arrays into high-resolution systems for large-area spatiotemporal touch sensing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13847v1-abstract-full').style.display = 'none'; document.getElementById('2410.13847v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">40 pages, 8 main figures, 12 supplemental figures, Videos can be accessed at https://tinyurl.com/TactileSubsampling</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12540">arXiv:2410.12540</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.12540">pdf</a>, <a href="https://arxiv.org/format/2410.12540">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> SEMSO: A Secure and Efficient Multi-Data Source Blockchain Oracle </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xian%2C+Y">Youquan Xian</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+X">Xueying Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chunpei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+P">Peng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongcheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+P">Peng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xianxian Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12540v1-abstract-short" style="display: inline;"> In recent years, blockchain oracle, as the key link between blockchain and real-world data interaction, has greatly expanded the application scope of blockchain. In particular, the emergence of the Multi-Data Source (MDS) oracle has greatly improved the reliability of the oracle in the case of untrustworthy data sources. However, the current MDS oracle scheme requires nodes to obtain data redundan&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12540v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12540v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12540v1-abstract-full" style="display: none;"> In recent years, blockchain oracle, as the key link between blockchain and real-world data interaction, has greatly expanded the application scope of blockchain. In particular, the emergence of the Multi-Data Source (MDS) oracle has greatly improved the reliability of the oracle in the case of untrustworthy data sources. However, the current MDS oracle scheme requires nodes to obtain data redundantly from multiple data sources to guarantee data reliability, which greatly increases the resource overhead and response time of the system. Therefore, in this paper, we propose a Secure and Efficient Multi-data Source Oracle framework (SEMSO), which nodes only need to access one data source to ensure the reliability of final data. First, we design a new off-chain data aggregation protocol TBLS, to guarantee data source diversity and reliability at low cost. Second, according to the rational man assumption, the data source selection task of nodes is modeled and solved based on the Bayesian game under incomplete information to maximize the node&#39;s revenue while improving the success rate of TBLS aggregation and system response speed. Security analysis verifies the reliability of the proposed scheme, and experiments show that under the same environmental assumptions, SEMSO takes into account data diversity while reducing the response time by 23.5\%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12540v1-abstract-full').style.display = 'none'; document.getElementById('2410.12540v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to TPDS</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12425">arXiv:2410.12425</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.12425">pdf</a>, <a href="https://arxiv.org/format/2410.12425">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Perseus: Leveraging Common Data Patterns with Curriculum Learning for More Robust Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xia%2C+K">Kaiwen Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+H">Huijun Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Duanyu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+M">Min Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+R">Ruibo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wenzhe Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12425v1-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs) excel at handling graph data but remain vulnerable to adversarial attacks. Existing defense methods typically rely on assumptions like graph sparsity and homophily to either preprocess the graph or guide structure learning. However, preprocessing methods often struggle to accurately distinguish between normal edges and adversarial perturbations, leading to suboptimal r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12425v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12425v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12425v1-abstract-full" style="display: none;"> Graph Neural Networks (GNNs) excel at handling graph data but remain vulnerable to adversarial attacks. Existing defense methods typically rely on assumptions like graph sparsity and homophily to either preprocess the graph or guide structure learning. However, preprocessing methods often struggle to accurately distinguish between normal edges and adversarial perturbations, leading to suboptimal results due to the loss of valuable edge information. Robust graph neural network models train directly on graph data affected by adversarial perturbations, without preprocessing. This can cause the model to get stuck in poor local optima, negatively affecting its performance. To address these challenges, we propose Perseus, a novel adversarial defense method based on curriculum learning. Perseus assesses edge difficulty using global homophily and applies a curriculum learning strategy to adjust the learning order, guiding the model to learn the full graph structure while adaptively focusing on common data patterns. This approach mitigates the impact of adversarial perturbations. Experiments show that models trained with Perseus achieve superior performance and are significantly more robust to adversarial attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12425v1-abstract-full').style.display = 'none'; document.getElementById('2410.12425v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11315">arXiv:2410.11315</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.11315">pdf</a>, <a href="https://arxiv.org/format/2410.11315">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SEER: Self-Aligned Evidence Extraction for Retrieval-Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+X">Xinping Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongfang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+Y">Yan Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+B">Boren Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yibin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+B">Baotian Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Min Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11315v1-abstract-short" style="display: inline;"> Recent studies in Retrieval-Augmented Generation (RAG) have investigated extracting evidence from retrieved passages to reduce computational costs and enhance the final RAG performance, yet it remains challenging. Existing methods heavily rely on heuristic-based augmentation, encountering several issues: (1) Poor generalization due to hand-crafted context filtering; (2) Semantics deficiency due to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11315v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11315v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11315v1-abstract-full" style="display: none;"> Recent studies in Retrieval-Augmented Generation (RAG) have investigated extracting evidence from retrieved passages to reduce computational costs and enhance the final RAG performance, yet it remains challenging. Existing methods heavily rely on heuristic-based augmentation, encountering several issues: (1) Poor generalization due to hand-crafted context filtering; (2) Semantics deficiency due to rule-based context chunking; (3) Skewed length due to sentence-wise filter learning. To address these issues, we propose a model-based evidence extraction learning framework, SEER, optimizing a vanilla model as an evidence extractor with desired properties through self-aligned learning. Extensive experiments show that our method largely improves the final RAG performance, enhances the faithfulness, helpfulness, and conciseness of the extracted evidence, and reduces the evidence length by 9.25 times. The code will be available at https://github.com/HITsz-TMG/SEER. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11315v1-abstract-full').style.display = 'none'; document.getElementById('2410.11315v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 6 figures, 5 tables. Accepted by EMNLP 2024 (main)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10804">arXiv:2410.10804</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.10804">pdf</a>, <a href="https://arxiv.org/format/2410.10804">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> TrajDiffuse: A Conditional Diffusion Model for Environment-Aware Trajectory Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qingze"> Qingze</a>, <a href="/search/cs?searchtype=author&amp;query=Liu"> Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Danrui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sohn%2C+S+S">Samuel S. Sohn</a>, <a href="/search/cs?searchtype=author&amp;query=Yoon%2C+S">Sejong Yoon</a>, <a href="/search/cs?searchtype=author&amp;query=Kapadia%2C+M">Mubbasir Kapadia</a>, <a href="/search/cs?searchtype=author&amp;query=Pavlovic%2C+V">Vladimir Pavlovic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10804v1-abstract-short" style="display: inline;"> Accurate prediction of human or vehicle trajectories with good diversity that captures their stochastic nature is an essential task for many applications. However, many trajectory prediction models produce unreasonable trajectory samples that focus on improving diversity or accuracy while neglecting other key requirements, such as collision avoidance with the surrounding environment. In this work,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10804v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10804v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10804v1-abstract-full" style="display: none;"> Accurate prediction of human or vehicle trajectories with good diversity that captures their stochastic nature is an essential task for many applications. However, many trajectory prediction models produce unreasonable trajectory samples that focus on improving diversity or accuracy while neglecting other key requirements, such as collision avoidance with the surrounding environment. In this work, we propose TrajDiffuse, a planning-based trajectory prediction method using a novel guided conditional diffusion model. We form the trajectory prediction problem as a denoising impaint task and design a map-based guidance term for the diffusion process. TrajDiffuse is able to generate trajectory predictions that match or exceed the accuracy and diversity of the SOTA, while adhering almost perfectly to environmental constraints. We demonstrate the utility of our model through experiments on the nuScenes and PFSD datasets and provide an extensive benchmark analysis against the SOTA methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10804v1-abstract-full').style.display = 'none'; document.getElementById('2410.10804v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to be published as inpreceedings of the 2024 International Conference on Pattern Recognition (ICPR)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10408">arXiv:2410.10408</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.10408">pdf</a>, <a href="https://arxiv.org/format/2410.10408">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Medico: Towards Hallucination Detection and Correction with Multi-source Evidence Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+X">Xinping Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Jindi Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhenyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jifang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongfang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yibin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+B">Baotian Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Min Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10408v1-abstract-short" style="display: inline;"> As we all know, hallucinations prevail in Large Language Models (LLMs), where the generated content is coherent but factually incorrect, which inflicts a heavy blow on the widespread application of LLMs. Previous studies have shown that LLMs could confidently state non-existent facts rather than answering ``I don&#39;t know&#39;&#39;. Therefore, it is necessary to resort to external knowledge to detect and co&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10408v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10408v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10408v1-abstract-full" style="display: none;"> As we all know, hallucinations prevail in Large Language Models (LLMs), where the generated content is coherent but factually incorrect, which inflicts a heavy blow on the widespread application of LLMs. Previous studies have shown that LLMs could confidently state non-existent facts rather than answering ``I don&#39;t know&#39;&#39;. Therefore, it is necessary to resort to external knowledge to detect and correct the hallucinated content. Since manual detection and correction of factual errors is labor-intensive, developing an automatic end-to-end hallucination-checking approach is indeed a needful thing. To this end, we present Medico, a Multi-source evidence fusion enhanced hallucination detection and correction framework. It fuses diverse evidence from multiple sources, detects whether the generated content contains factual errors, provides the rationale behind the judgment, and iteratively revises the hallucinated content. Experimental results on evidence retrieval (0.964 HR@5, 0.908 MRR@5), hallucination detection (0.927-0.951 F1), and hallucination correction (0.973-0.979 approval rate) manifest the great potential of Medico. A video demo of Medico can be found at https://youtu.be/RtsO6CSesBI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10408v1-abstract-full').style.display = 'none'; document.getElementById('2410.10408v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 3 figures, 6 tables. Accepted by EMNLP 2024&#39;s demo track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10370">arXiv:2410.10370</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.10370">pdf</a>, <a href="https://arxiv.org/format/2410.10370">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Innovative Thinking, Infinite Humor: Humor Research of Large Language Models through Structured Thought Leaps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Han Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yilin Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xiaohan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+G">Gang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+X">Xuguang Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hui Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10370v1-abstract-short" style="display: inline;"> Humor is a culturally nuanced aspect of human language that presents challenges for understanding and generation, requiring participants to possess good creativity and strong associative thinking. Similar to reasoning tasks like solving math problems, humor generation requires continuous reflection and revision to foster creative thinking, rather than relying on a sudden flash of inspiration like&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10370v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10370v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10370v1-abstract-full" style="display: none;"> Humor is a culturally nuanced aspect of human language that presents challenges for understanding and generation, requiring participants to possess good creativity and strong associative thinking. Similar to reasoning tasks like solving math problems, humor generation requires continuous reflection and revision to foster creative thinking, rather than relying on a sudden flash of inspiration like Creative Leap-of-Thought (CLoT) paradigm. Although CLoT can realize the ability of remote association generation, this paradigm fails to generate humor content. Therefore, in this paper, we propose a systematic way of thinking about generating humor and based on it, we built Creative Leap of Structured Thought (CLoST) frame. First, a reward model is necessary achieve the purpose of being able to correct errors, since there is currently no expert model of humor and a usable rule to determine whether a piece of content is humorous. Judgement-oriented instructions are designed to improve the capability of a model, and we also propose an open-domain instruction evolutionary method to fully unleash the potential. Then, through reinforcement learning, the model learns to hone its rationales of the thought chain and refine the strategies it uses. Thus, it learns to recognize and correct its mistakes, and finally generate the most humorous and creative answer. These findings deepen our understanding of the creative capabilities of LLMs and provide ways to enhance LLMs&#39; creative abilities for cross-domain innovative applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10370v1-abstract-full').style.display = 'none'; document.getElementById('2410.10370v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10293">arXiv:2410.10293</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.10293">pdf</a>, <a href="https://arxiv.org/format/2410.10293">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> FunnelRAG: A Coarse-to-Fine Progressive Retrieval Paradigm for RAG </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+X">Xinping Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+Y">Yan Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Z">Zetian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+X">Xinshuo Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhenyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dongfang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+B">Baotian Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Min Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10293v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) prevails in Large Language Models. It mainly consists of retrieval and generation. The retrieval modules (a.k.a. retrievers) aim to find useful information used to facilitate generation modules (a.k.a. generators). As such, generators&#39; performance largely depends on the effectiveness and efficiency of retrievers. However, the retrieval paradigm that we design a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10293v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10293v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10293v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) prevails in Large Language Models. It mainly consists of retrieval and generation. The retrieval modules (a.k.a. retrievers) aim to find useful information used to facilitate generation modules (a.k.a. generators). As such, generators&#39; performance largely depends on the effectiveness and efficiency of retrievers. However, the retrieval paradigm that we design and use remains flat, which treats the retrieval procedures as a one-off deal with constant granularity. Despite effectiveness, we argue that they suffer from two limitations: (1) flat retrieval exerts a significant burden on one retriever; (2) constant granularity limits the ceiling of retrieval performance. In this work, we propose a progressive retrieval paradigm with coarse-to-fine granularity for RAG, termed FunnelRAG, so as to balance effectiveness and efficiency. Specifically, FunnelRAG establishes a progressive retrieval pipeline by collaborating coarse-to-fine granularity, large-to-small quantity, and low-to-high capacity, which can relieve the burden on one retriever and also promote the ceiling of retrieval performance. Extensive experiments manifest that FunnelRAG achieves comparable retrieval performance while the time overhead is reduced by nearly 40 percent. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10293v1-abstract-full').style.display = 'none'; document.getElementById('2410.10293v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 6 figures, 13 tables</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+D&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10