Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 274 results for author: <span class="mathjax">Zheng, T</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Zheng%2C+T">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zheng, T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zheng%2C+T&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zheng, T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01455">arXiv:2412.01455</a> <span> [<a href="https://arxiv.org/pdf/2412.01455">pdf</a>, <a href="https://arxiv.org/format/2412.01455">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Early Exit Is a Natural Capability in Transformer-based Models: An Empirical Study on Early Exit without Joint Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shan%2C+W">Weiqiao Shan</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+L">Long Meng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tong Zheng</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yingfeng Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bei Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+j">junxin Wang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+T">Tong Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jingbo Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01455v1-abstract-short" style="display: inline;"> Large language models (LLMs) exhibit exceptional performance across various downstream tasks. However, they encounter limitations due to slow inference speeds stemming from their extensive parameters. The early exit (EE) is an approach that aims to accelerate auto-regressive decoding. EE generates outputs from intermediate layers instead of using the whole model, which offers a promising solution… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01455v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01455v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01455v1-abstract-full" style="display: none;"> Large language models (LLMs) exhibit exceptional performance across various downstream tasks. However, they encounter limitations due to slow inference speeds stemming from their extensive parameters. The early exit (EE) is an approach that aims to accelerate auto-regressive decoding. EE generates outputs from intermediate layers instead of using the whole model, which offers a promising solution to this challenge. However, additional output layers and joint optimization used in conventional EE hinder the application of EE in LLMs. In this paper, we explore the possibility of LLMs EE without additional output layers and joint optimization. Our findings indicate that EE is a natural capability within transformer-based models. While joint optimization does not give model EE capability, it must be employed to address challenges by improving the accuracy of locating the optimal EE layer through gating functions. Additionally, our study reveals patterns in EE behavior from a sub-word perspective based on the LLaMA model and the potential possibility for EE based on sub-layers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01455v1-abstract-full').style.display = 'none'; document.getElementById('2412.01455v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00580">arXiv:2412.00580</a> <span> [<a href="https://arxiv.org/pdf/2412.00580">pdf</a>, <a href="https://arxiv.org/format/2412.00580">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Continuous Concepts Removal in Text-to-image Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+T">Tingxu Han</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Weisong Sun</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yanrong Hu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+C">Chunrong Fang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yonglong Zhang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shiqing Ma</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhenyu Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhenting Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00580v1-abstract-short" style="display: inline;"> Text-to-image diffusion models have shown an impressive ability to generate high-quality images from input textual descriptions. However, concerns have been raised about the potential for these models to create content that infringes on copyrights or depicts disturbing subject matter. Removing specific concepts from these models is a promising potential solution to this problem. However, existing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00580v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00580v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00580v1-abstract-full" style="display: none;"> Text-to-image diffusion models have shown an impressive ability to generate high-quality images from input textual descriptions. However, concerns have been raised about the potential for these models to create content that infringes on copyrights or depicts disturbing subject matter. Removing specific concepts from these models is a promising potential solution to this problem. However, existing methods for concept removal do not work well in practical but challenging scenarios where concepts need to be continuously removed. Specifically, these methods lead to poor alignment between the text prompts and the generated image after the continuous removal process. To address this issue, we propose a novel approach called CCRT that includes a designed knowledge distillation paradigm. It constrains the text-image alignment behavior during the continuous concept removal process by using a set of text prompts generated through our genetic algorithm, which employs a designed fuzzing strategy. We conduct extensive experiments involving the removal of various concepts. The results evaluated through both algorithmic metrics and human studies demonstrate that our CCRT can effectively remove the targeted concepts in a continuous manner while maintaining the high generation quality (e.g., text-image alignment) of the model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00580v1-abstract-full').style.display = 'none'; document.getElementById('2412.00580v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14679">arXiv:2411.14679</a> <span> [<a href="https://arxiv.org/pdf/2411.14679">pdf</a>, <a href="https://arxiv.org/format/2411.14679">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Recursive Gaussian Process State Space Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tengjie Zheng</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+L">Lin Cheng</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+S">Shengping Gong</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xu Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14679v1-abstract-short" style="display: inline;"> Learning dynamical models from data is not only fundamental but also holds great promise for advancing principle discovery, time-series prediction, and controller design. Among various approaches, Gaussian Process State-Space Models (GPSSMs) have recently gained significant attention due to their combination of flexibility and interpretability. However, for online learning, the field lacks an effi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14679v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14679v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14679v1-abstract-full" style="display: none;"> Learning dynamical models from data is not only fundamental but also holds great promise for advancing principle discovery, time-series prediction, and controller design. Among various approaches, Gaussian Process State-Space Models (GPSSMs) have recently gained significant attention due to their combination of flexibility and interpretability. However, for online learning, the field lacks an efficient method suitable for scenarios where prior information regarding data distribution and model function is limited. To address this issue, this paper proposes a recursive GPSSM method with adaptive capabilities for both operating domains and Gaussian process (GP) hyperparameters. Specifically, we first utilize first-order linearization to derive a Bayesian update equation for the joint distribution between the system state and the GP model, enabling closed-form and domain-independent learning. Second, an online selection algorithm for inducing points is developed based on informative criteria to achieve lightweight learning. Third, to support online hyperparameter optimization, we recover historical measurement information from the current filtering distribution. Comprehensive evaluations on both synthetic and real-world datasets demonstrate the superior accuracy, computational efficiency, and adaptability of our method compared to state-of-the-art online GPSSM techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14679v1-abstract-full').style.display = 'none'; document.getElementById('2411.14679v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14572">arXiv:2411.14572</a> <span> [<a href="https://arxiv.org/pdf/2411.14572">pdf</a>, <a href="https://arxiv.org/format/2411.14572">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Towards Knowledge Checking in Retrieval-augmented Generation: A Representation Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+S">Shenglai Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiankun Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bingheng Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yuping Lin</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianqi Zheng</a>, <a href="/search/cs?searchtype=author&query=Everaert%2C+D">Dante Everaert</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Hanqing Lu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+Y">Yue Xing</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M+X">Monica Xiao Cheng</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiliang Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14572v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) systems have shown promise in enhancing the performance of Large Language Models (LLMs). However, these systems face challenges in effectively integrating external knowledge with the LLM's internal knowledge, often leading to issues with misleading or unhelpful information. This work aims to provide a systematic study on knowledge checking in RAG systems. We co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14572v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14572v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14572v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) systems have shown promise in enhancing the performance of Large Language Models (LLMs). However, these systems face challenges in effectively integrating external knowledge with the LLM's internal knowledge, often leading to issues with misleading or unhelpful information. This work aims to provide a systematic study on knowledge checking in RAG systems. We conduct a comprehensive analysis of LLM representation behaviors and demonstrate the significance of using representations in knowledge checking. Motivated by the findings, we further develop representation-based classifiers for knowledge filtering. We show substantial improvements in RAG performance, even when dealing with noisy knowledge databases. Our study provides new insights into leveraging LLM representations for enhancing the reliability and effectiveness of RAG systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14572v1-abstract-full').style.display = 'none'; document.getElementById('2411.14572v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13885">arXiv:2411.13885</a> <span> [<a href="https://arxiv.org/pdf/2411.13885">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Trajectory Tracking Using Frenet Coordinates with Deep Deterministic Policy Gradient </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+T">Tongzhou Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lipeng Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Junyue Jiang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyao Zheng</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yuhui Jin</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kunpeng Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13885v1-abstract-short" style="display: inline;"> This paper studies the application of the DDPG algorithm in trajectory-tracking tasks and proposes a trajectorytracking control method combined with Frenet coordinate system. By converting the vehicle's position and velocity information from the Cartesian coordinate system to Frenet coordinate system, this method can more accurately describe the vehicle's deviation and travel distance relative to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13885v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13885v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13885v1-abstract-full" style="display: none;"> This paper studies the application of the DDPG algorithm in trajectory-tracking tasks and proposes a trajectorytracking control method combined with Frenet coordinate system. By converting the vehicle's position and velocity information from the Cartesian coordinate system to Frenet coordinate system, this method can more accurately describe the vehicle's deviation and travel distance relative to the center line of the road. The DDPG algorithm adopts the Actor-Critic framework, uses deep neural networks for strategy and value evaluation, and combines the experience replay mechanism and target network to improve the algorithm's stability and data utilization efficiency. Experimental results show that the DDPG algorithm based on Frenet coordinate system performs well in trajectory-tracking tasks in complex environments, achieves high-precision and stable path tracking, and demonstrates its application potential in autonomous driving and intelligent transportation systems. Keywords- DDPG; path tracking; robot navigation <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13885v1-abstract-full').style.display = 'none'; document.getElementById('2411.13885v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06546">arXiv:2411.06546</a> <span> [<a href="https://arxiv.org/pdf/2411.06546">pdf</a>, <a href="https://arxiv.org/format/2411.06546">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Lower Bounds for Adaptive Relaxation-Based Algorithms for Single-Source Shortest Paths </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Atalig%2C+S">Sunny Atalig</a>, <a href="/search/cs?searchtype=author&query=Hickerson%2C+A">Alexander Hickerson</a>, <a href="/search/cs?searchtype=author&query=Srivastav%2C+A">Arrdya Srivastav</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tingting Zheng</a>, <a href="/search/cs?searchtype=author&query=Chrobak%2C+M">Marek Chrobak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06546v1-abstract-short" style="display: inline;"> We consider the classical single-source shortest path problem in directed weighted graphs. D.~Eppstein proved recently an $惟(n^3)$ lower bound for oblivious algorithms that use relaxation operations to update the tentative distances from the source vertex. We generalize this result by extending this $惟(n^3)$ lower bound to \emph{adaptive} algorithms that, in addition to relaxations, can perform qu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06546v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06546v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06546v1-abstract-full" style="display: none;"> We consider the classical single-source shortest path problem in directed weighted graphs. D.~Eppstein proved recently an $惟(n^3)$ lower bound for oblivious algorithms that use relaxation operations to update the tentative distances from the source vertex. We generalize this result by extending this $惟(n^3)$ lower bound to \emph{adaptive} algorithms that, in addition to relaxations, can perform queries involving some simple types of linear inequalities between edge weights and tentative distances. Our model captures as a special case the operations on tentative distances used by Dijkstra's algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06546v1-abstract-full').style.display = 'none'; document.getElementById('2411.06546v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04129">arXiv:2411.04129</a> <span> [<a href="https://arxiv.org/pdf/2411.04129">pdf</a>, <a href="https://arxiv.org/format/2411.04129">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AmazonQAC: A Large-Scale, Naturalistic Query Autocomplete Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Everaert%2C+D">Dante Everaert</a>, <a href="/search/cs?searchtype=author&query=Patki%2C+R">Rohit Patki</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianqi Zheng</a>, <a href="/search/cs?searchtype=author&query=Potts%2C+C">Christopher Potts</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04129v1-abstract-short" style="display: inline;"> Query Autocomplete (QAC) is a critical feature in modern search engines, facilitating user interaction by predicting search queries based on input prefixes. Despite its widespread adoption, the absence of large-scale, realistic datasets has hindered advancements in QAC system development. This paper addresses this gap by introducing AmazonQAC, a new QAC dataset sourced from Amazon Search logs, com… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04129v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04129v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04129v1-abstract-full" style="display: none;"> Query Autocomplete (QAC) is a critical feature in modern search engines, facilitating user interaction by predicting search queries based on input prefixes. Despite its widespread adoption, the absence of large-scale, realistic datasets has hindered advancements in QAC system development. This paper addresses this gap by introducing AmazonQAC, a new QAC dataset sourced from Amazon Search logs, comprising 395M samples. The dataset includes actual sequences of user-typed prefixes leading to final search terms, as well as session IDs and timestamps that support modeling the context-dependent aspects of QAC. We assess Prefix Trees, semantic retrieval, and Large Language Models (LLMs) with and without finetuning. We find that finetuned LLMs perform best, particularly when incorporating contextual information. However, even our best system achieves only half of what we calculate is theoretically possible on our test data, which implies QAC is a challenging problem that is far from solved with existing systems. This contribution aims to stimulate further research on QAC systems to better serve user needs in diverse environments. We open-source this data on Hugging Face at https://huggingface.co/datasets/amazon/AmazonQAC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04129v1-abstract-full').style.display = 'none'; document.getElementById('2411.04129v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03042">arXiv:2411.03042</a> <span> [<a href="https://arxiv.org/pdf/2411.03042">pdf</a>, <a href="https://arxiv.org/format/2411.03042">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Predictor-Corrector Enhanced Transformers with Exponential Moving Average Coefficient Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+B">Bei Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tong Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiahao Liu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qingyan Guo</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junliang Guo</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xu Tan</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+T">Tong Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jingbo Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingang Wang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xunliang Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03042v1-abstract-short" style="display: inline;"> Residual networks, as discrete approximations of Ordinary Differential Equations (ODEs), have inspired significant advancements in neural network design, including multistep methods, high-order methods, and multi-particle dynamical systems. The precision of the solution to ODEs significantly affects parameter optimization, thereby impacting model performance. In this work, we present a series of a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03042v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03042v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03042v1-abstract-full" style="display: none;"> Residual networks, as discrete approximations of Ordinary Differential Equations (ODEs), have inspired significant advancements in neural network design, including multistep methods, high-order methods, and multi-particle dynamical systems. The precision of the solution to ODEs significantly affects parameter optimization, thereby impacting model performance. In this work, we present a series of advanced explorations of Transformer architecture design to minimize the error compared to the true ``solution.'' First, we introduce a predictor-corrector learning framework to minimize truncation errors, which consists of a high-order predictor and a multistep corrector. Second, we propose an exponential moving average-based coefficient learning method to strengthen our higher-order predictor. Extensive experiments on large-scale machine translation, abstractive summarization, language modeling, and natural language understanding benchmarks demonstrate the superiority of our approach. On the WMT'14 English-German and English-French tasks, our model achieved BLEU scores of 30.95 and 44.27, respectively. Furthermore, on the OPUS multilingual machine translation task, our model surpasses a robust 3.8B DeepNet by an average of 2.9 SacreBLEU, using only 1/3 parameters. Notably, it also beats LLama models by 5.7 accuracy points on the LM Harness Evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03042v1-abstract-full').style.display = 'none'; document.getElementById('2411.03042v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02867">arXiv:2411.02867</a> <span> [<a href="https://arxiv.org/pdf/2411.02867">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AtlasSeg: Atlas Prior Guided Dual-U-Net for Cortical Segmentation in Fetal Brain MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Haoan Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianshu Zheng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xinyi Xu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yao Shen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jiwei Sun</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Cong Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guangbin Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Dan Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02867v1-abstract-short" style="display: inline;"> Accurate tissue segmentation in fetal brain MRI remains challenging due to the dynamically changing anatomical anatomy and contrast during fetal development. To enhance segmentation accuracy throughout gestation, we introduced AtlasSeg, a dual-U-shape convolution network incorporating gestational age (GA) specific information as guidance. By providing a publicly available fetal brain atlas with se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02867v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02867v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02867v1-abstract-full" style="display: none;"> Accurate tissue segmentation in fetal brain MRI remains challenging due to the dynamically changing anatomical anatomy and contrast during fetal development. To enhance segmentation accuracy throughout gestation, we introduced AtlasSeg, a dual-U-shape convolution network incorporating gestational age (GA) specific information as guidance. By providing a publicly available fetal brain atlas with segmentation label at the corresponding GA, AtlasSeg effectively extracted the contextual features of age-specific patterns in atlas branch and generated tissue segmentation in segmentation branch. Multi-scale attentive atlas feature fusions were constructed in all stages during encoding and decoding, giving rise to a dual-U-shape network to assist feature flow and information interactions between two branches. AtlasSeg outperformed six well-known segmentation networks in both our internal fetal brain MRI dataset and the external FeTA dataset. Ablation experiments demonstrate the efficiency of atlas guidance and the attention mechanism. The proposed AtlasSeg demonstrated superior segmentation performance against other convolution networks with higher segmentation accuracy, and may facilitate fetal brain MRI analysis in large-scale fetal brain studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02867v1-abstract-full').style.display = 'none'; document.getElementById('2411.02867v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01157">arXiv:2411.01157</a> <span> [<a href="https://arxiv.org/pdf/2411.01157">pdf</a>, <a href="https://arxiv.org/format/2411.01157">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.neunet.2024.106846">10.1016/j.neunet.2024.106846 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Negative-Free Self-Supervised Gaussian Embedding of Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yunhui Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+T">Tieke He</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jianhua Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01157v1-abstract-short" style="display: inline;"> Graph Contrastive Learning (GCL) has recently emerged as a promising graph self-supervised learning framework for learning discriminative node representations without labels. The widely adopted objective function of GCL benefits from two key properties: \emph{alignment} and \emph{uniformity}, which align representations of positive node pairs while uniformly distributing all representations on the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01157v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01157v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01157v1-abstract-full" style="display: none;"> Graph Contrastive Learning (GCL) has recently emerged as a promising graph self-supervised learning framework for learning discriminative node representations without labels. The widely adopted objective function of GCL benefits from two key properties: \emph{alignment} and \emph{uniformity}, which align representations of positive node pairs while uniformly distributing all representations on the hypersphere. The uniformity property plays a critical role in preventing representation collapse and is achieved by pushing apart augmented views of different nodes (negative pairs). As such, existing GCL methods inherently rely on increasing the quantity and quality of negative samples, resulting in heavy computational demands, memory overhead, and potential class collision issues. In this study, we propose a negative-free objective to achieve uniformity, inspired by the fact that points distributed according to a normalized isotropic Gaussian are uniformly spread across the unit hypersphere. Therefore, we can minimize the distance between the distribution of learned representations and the isotropic Gaussian distribution to promote the uniformity of node representations. Our method also distinguishes itself from other approaches by eliminating the need for a parameterized mutual information estimator, an additional projector, asymmetric structures, and, crucially, negative samples. Extensive experiments over seven graph benchmarks demonstrate that our proposal achieves competitive performance with fewer parameters, shorter training times, and lower memory consumption compared to existing GCL methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01157v1-abstract-full').style.display = 'none'; document.getElementById('2411.01157v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Neural Networks</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21716">arXiv:2410.21716</a> <span> [<a href="https://arxiv.org/pdf/2410.21716">pdf</a>, <a href="https://arxiv.org/format/2410.21716">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> A Bayesian Approach to Harnessing the Power of LLMs in Authorship Attribution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhengmian Hu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tong Zheng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Heng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21716v1-abstract-short" style="display: inline;"> Authorship attribution aims to identify the origin or author of a document. Traditional approaches have heavily relied on manual features and fail to capture long-range correlations, limiting their effectiveness. Recent advancements leverage text embeddings from pre-trained language models, which require significant fine-tuning on labeled data, posing challenges in data dependency and limited inte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21716v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21716v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21716v1-abstract-full" style="display: none;"> Authorship attribution aims to identify the origin or author of a document. Traditional approaches have heavily relied on manual features and fail to capture long-range correlations, limiting their effectiveness. Recent advancements leverage text embeddings from pre-trained language models, which require significant fine-tuning on labeled data, posing challenges in data dependency and limited interpretability. Large Language Models (LLMs), with their deep reasoning capabilities and ability to maintain long-range textual associations, offer a promising alternative. This study explores the potential of pre-trained LLMs in one-shot authorship attribution, specifically utilizing Bayesian approaches and probability outputs of LLMs. Our methodology calculates the probability that a text entails previous writings of an author, reflecting a more nuanced understanding of authorship. By utilizing only pre-trained models such as Llama-3-70B, our results on the IMDb and blog datasets show an impressive 85\% accuracy in one-shot authorship classification across ten authors. Our findings set new baselines for one-shot authorship analysis using LLMs and expand the application scope of these models in forensic linguistics. This work also includes extensive ablation studies to validate our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21716v1-abstract-full').style.display = 'none'; document.getElementById('2410.21716v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21276">arXiv:2410.21276</a> <span> [<a href="https://arxiv.org/pdf/2410.21276">pdf</a>, <a href="https://arxiv.org/format/2410.21276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> GPT-4o System Card </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=OpenAI"> OpenAI</a>, <a href="/search/cs?searchtype=author&query=%3A"> :</a>, <a href="/search/cs?searchtype=author&query=Hurst%2C+A">Aaron Hurst</a>, <a href="/search/cs?searchtype=author&query=Lerer%2C+A">Adam Lerer</a>, <a href="/search/cs?searchtype=author&query=Goucher%2C+A+P">Adam P. Goucher</a>, <a href="/search/cs?searchtype=author&query=Perelman%2C+A">Adam Perelman</a>, <a href="/search/cs?searchtype=author&query=Ramesh%2C+A">Aditya Ramesh</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+A">Aidan Clark</a>, <a href="/search/cs?searchtype=author&query=Ostrow%2C+A">AJ Ostrow</a>, <a href="/search/cs?searchtype=author&query=Welihinda%2C+A">Akila Welihinda</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+A">Alan Hayes</a>, <a href="/search/cs?searchtype=author&query=Radford%2C+A">Alec Radford</a>, <a href="/search/cs?searchtype=author&query=M%C4%85dry%2C+A">Aleksander M膮dry</a>, <a href="/search/cs?searchtype=author&query=Baker-Whitcomb%2C+A">Alex Baker-Whitcomb</a>, <a href="/search/cs?searchtype=author&query=Beutel%2C+A">Alex Beutel</a>, <a href="/search/cs?searchtype=author&query=Borzunov%2C+A">Alex Borzunov</a>, <a href="/search/cs?searchtype=author&query=Carney%2C+A">Alex Carney</a>, <a href="/search/cs?searchtype=author&query=Chow%2C+A">Alex Chow</a>, <a href="/search/cs?searchtype=author&query=Kirillov%2C+A">Alex Kirillov</a>, <a href="/search/cs?searchtype=author&query=Nichol%2C+A">Alex Nichol</a>, <a href="/search/cs?searchtype=author&query=Paino%2C+A">Alex Paino</a>, <a href="/search/cs?searchtype=author&query=Renzin%2C+A">Alex Renzin</a>, <a href="/search/cs?searchtype=author&query=Passos%2C+A+T">Alex Tachard Passos</a>, <a href="/search/cs?searchtype=author&query=Kirillov%2C+A">Alexander Kirillov</a>, <a href="/search/cs?searchtype=author&query=Christakis%2C+A">Alexi Christakis</a> , et al. (395 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21276v1-abstract-short" style="display: inline;"> GPT-4o is an autoregressive omni model that accepts as input any combination of text, audio, image, and video, and generates any combination of text, audio, and image outputs. It's trained end-to-end across text, vision, and audio, meaning all inputs and outputs are processed by the same neural network. GPT-4o can respond to audio inputs in as little as 232 milliseconds, with an average of 320 mil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21276v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21276v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21276v1-abstract-full" style="display: none;"> GPT-4o is an autoregressive omni model that accepts as input any combination of text, audio, image, and video, and generates any combination of text, audio, and image outputs. It's trained end-to-end across text, vision, and audio, meaning all inputs and outputs are processed by the same neural network. GPT-4o can respond to audio inputs in as little as 232 milliseconds, with an average of 320 milliseconds, which is similar to human response time in conversation. It matches GPT-4 Turbo performance on text in English and code, with significant improvement on text in non-English languages, while also being much faster and 50\% cheaper in the API. GPT-4o is especially better at vision and audio understanding compared to existing models. In line with our commitment to building AI safely and consistent with our voluntary commitments to the White House, we are sharing the GPT-4o System Card, which includes our Preparedness Framework evaluations. In this System Card, we provide a detailed look at GPT-4o's capabilities, limitations, and safety evaluations across multiple categories, focusing on speech-to-speech while also evaluating text and image capabilities, and measures we've implemented to ensure the model is safe and aligned. We also include third-party assessments on dangerous capabilities, as well as discussion of potential societal impacts of GPT-4o's text and vision capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21276v1-abstract-full').style.display = 'none'; document.getElementById('2410.21276v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20424">arXiv:2410.20424</a> <span> [<a href="https://arxiv.org/pdf/2410.20424">pdf</a>, <a href="https://arxiv.org/format/2410.20424">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AutoKaggle: A Multi-Agent Framework for Autonomous Data Science Competitions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziming Li</a>, <a href="/search/cs?searchtype=author&query=Zang%2C+Q">Qianbo Zang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+D">David Ma</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiawei Guo</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tuney Zheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Minghao Liu</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+X">Xinyao Niu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaheng Liu</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+W">Wanjun Zhong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wangchunshu Zhou</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20424v3-abstract-short" style="display: inline;"> Data science tasks involving tabular data present complex challenges that require sophisticated problem-solving approaches. We propose AutoKaggle, a powerful and user-centric framework that assists data scientists in completing daily data pipelines through a collaborative multi-agent system. AutoKaggle implements an iterative development process that combines code execution, debugging, and compreh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20424v3-abstract-full').style.display = 'inline'; document.getElementById('2410.20424v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20424v3-abstract-full" style="display: none;"> Data science tasks involving tabular data present complex challenges that require sophisticated problem-solving approaches. We propose AutoKaggle, a powerful and user-centric framework that assists data scientists in completing daily data pipelines through a collaborative multi-agent system. AutoKaggle implements an iterative development process that combines code execution, debugging, and comprehensive unit testing to ensure code correctness and logic consistency. The framework offers highly customizable workflows, allowing users to intervene at each phase, thus integrating automated intelligence with human expertise. Our universal data science toolkit, comprising validated functions for data cleaning, feature engineering, and modeling, forms the foundation of this solution, enhancing productivity by streamlining common tasks. We selected 8 Kaggle competitions to simulate data processing workflows in real-world application scenarios. Evaluation results demonstrate that AutoKaggle achieves a validation submission rate of 0.85 and a comprehensive score of 0.82 in typical data science pipelines, fully proving its effectiveness and practicality in handling complex data science tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20424v3-abstract-full').style.display = 'none'; document.getElementById('2410.20424v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">44 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19766">arXiv:2410.19766</a> <span> [<a href="https://arxiv.org/pdf/2410.19766">pdf</a>, <a href="https://arxiv.org/format/2410.19766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Large Model for Small Data: Foundation Model for Cross-Modal RF Human Activity Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Weng%2C+Y">Yuxuan Weng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+G">Guoquan Wu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyue Zheng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yanbing Yang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jun Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19766v1-abstract-short" style="display: inline;"> Radio-Frequency (RF)-based Human Activity Recognition (HAR) rises as a promising solution for applications unamenable to techniques requiring computer visions. However, the scarcity of labeled RF data due to their non-interpretable nature poses a significant obstacle. Thanks to the recent breakthrough of foundation models (FMs), extracting deep semantic insights from unlabeled visual data become v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19766v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19766v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19766v1-abstract-full" style="display: none;"> Radio-Frequency (RF)-based Human Activity Recognition (HAR) rises as a promising solution for applications unamenable to techniques requiring computer visions. However, the scarcity of labeled RF data due to their non-interpretable nature poses a significant obstacle. Thanks to the recent breakthrough of foundation models (FMs), extracting deep semantic insights from unlabeled visual data become viable, yet these vision-based FMs fall short when applied to small RF datasets. To bridge this gap, we introduce FM-Fi, an innovative cross-modal framework engineered to translate the knowledge of vision-based FMs for enhancing RF-based HAR systems. FM-Fi involves a novel cross-modal contrastive knowledge distillation mechanism, enabling an RF encoder to inherit the interpretative power of FMs for achieving zero-shot learning. It also employs the intrinsic capabilities of FM and RF to remove extraneous features for better alignment between the two modalities. The framework is further refined through metric-based few-shot learning techniques, aiming to boost the performance for predefined HAR tasks. Comprehensive evaluations evidently indicate that FM-Fi rivals the effectiveness of vision-based methodologies, and the evaluation results provide empirical validation of FM-Fi's generalizability across various environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19766v1-abstract-full').style.display = 'none'; document.getElementById('2410.19766v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13639">arXiv:2410.13639</a> <span> [<a href="https://arxiv.org/pdf/2410.13639">pdf</a>, <a href="https://arxiv.org/format/2410.13639">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Comparative Study on Reasoning Patterns of OpenAI's o1 Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+S">Siwei Wu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Zhongyuan Peng</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xinrun Du</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tuney Zheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Minghao Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jialong Wu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiachen Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yizhi Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wangchunshu Zhou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Qunshu Lin</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junbo Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaoxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chenghua Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J+H">J. H. Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13639v2-abstract-short" style="display: inline;"> Enabling Large Language Models (LLMs) to handle a wider range of complex tasks (e.g., coding, math) has drawn great attention from many researchers. As LLMs continue to evolve, merely increasing the number of model parameters yields diminishing performance improvements and heavy computational costs. Recently, OpenAI's o1 model has shown that inference strategies (i.e., Test-time Compute methods) c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13639v2-abstract-full').style.display = 'inline'; document.getElementById('2410.13639v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13639v2-abstract-full" style="display: none;"> Enabling Large Language Models (LLMs) to handle a wider range of complex tasks (e.g., coding, math) has drawn great attention from many researchers. As LLMs continue to evolve, merely increasing the number of model parameters yields diminishing performance improvements and heavy computational costs. Recently, OpenAI's o1 model has shown that inference strategies (i.e., Test-time Compute methods) can also significantly enhance the reasoning capabilities of LLMs. However, the mechanisms behind these methods are still unexplored. In our work, to investigate the reasoning patterns of o1, we compare o1 with existing Test-time Compute methods (BoN, Step-wise BoN, Agent Workflow, and Self-Refine) by using OpenAI's GPT-4o as a backbone on general reasoning benchmarks in three domains (i.e., math, coding, commonsense reasoning). Specifically, first, our experiments show that the o1 model has achieved the best performance on most datasets. Second, as for the methods of searching diverse responses (e.g., BoN), we find the reward models' capability and the search space both limit the upper boundary of these methods. Third, as for the methods that break the problem into many sub-problems, the Agent Workflow has achieved better performance than Step-wise BoN due to the domain-specific system prompt for planning better reasoning processes. Fourth, it is worth mentioning that we have summarized six reasoning patterns of o1, and provided a detailed analysis on several reasoning benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13639v2-abstract-full').style.display = 'none'; document.getElementById('2410.13639v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11988">arXiv:2410.11988</a> <span> [<a href="https://arxiv.org/pdf/2410.11988">pdf</a>, <a href="https://arxiv.org/format/2410.11988">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DISP-LLM: Dimension-Independent Structural Pruning for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shangqian Gao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chi-Heng Lin</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+T">Ting Hua</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tang Zheng</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yilin Shen</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+H">Hongxia Jin</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+Y">Yen-Chang Hsu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11988v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have achieved remarkable success in various natural language processing tasks, including language modeling, understanding, and generation. However, the increased memory and computational costs associated with these models pose significant challenges for deployment on resource-limited devices. Structural pruning has emerged as a promising solution to reduce the costs of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11988v2-abstract-full').style.display = 'inline'; document.getElementById('2410.11988v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11988v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have achieved remarkable success in various natural language processing tasks, including language modeling, understanding, and generation. However, the increased memory and computational costs associated with these models pose significant challenges for deployment on resource-limited devices. Structural pruning has emerged as a promising solution to reduce the costs of LLMs without requiring post-processing steps. Prior structural pruning methods either follow the dependence of structures at the cost of limiting flexibility, or introduce non-trivial additional parameters by incorporating different projection matrices. In this work, we propose a novel approach that relaxes the constraint imposed by regular structural pruning methods and eliminates the structural dependence along the embedding dimension. Our dimension-independent structural pruning method offers several benefits. Firstly, our method enables different blocks to utilize different subsets of the feature maps. Secondly, by removing structural dependence, we facilitate each block to possess varying widths along its input and output dimensions, thereby significantly enhancing the flexibility of structural pruning. We evaluate our method on various LLMs, including OPT, LLaMA, LLaMA-2, Phi-1.5, and Phi-2. Experimental results demonstrate that our approach outperforms other state-of-the-art methods, showing for the first time that structural pruning can achieve an accuracy similar to semi-structural pruning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11988v2-abstract-full').style.display = 'none'; document.getElementById('2410.11988v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09747">arXiv:2410.09747</a> <span> [<a href="https://arxiv.org/pdf/2410.09747">pdf</a>, <a href="https://arxiv.org/format/2410.09747">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> t-READi: Transformer-Powered Robust and Efficient Multimodal Inference for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+P">Pengfei Hu</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+Y">Yuhang Qian</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyue Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ang Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhe Chen</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yue Gao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiuzhen Cheng</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jun Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09747v3-abstract-short" style="display: inline;"> Given the wide adoption of multimodal sensors (e.g., camera, lidar, radar) by autonomous vehicles (AVs), deep analytics to fuse their outputs for a robust perception become imperative. However, existing fusion methods often make two assumptions rarely holding in practice: i) similar data distributions for all inputs and ii) constant availability for all sensors. Because, for example, lidars have v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09747v3-abstract-full').style.display = 'inline'; document.getElementById('2410.09747v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09747v3-abstract-full" style="display: none;"> Given the wide adoption of multimodal sensors (e.g., camera, lidar, radar) by autonomous vehicles (AVs), deep analytics to fuse their outputs for a robust perception become imperative. However, existing fusion methods often make two assumptions rarely holding in practice: i) similar data distributions for all inputs and ii) constant availability for all sensors. Because, for example, lidars have various resolutions and failures of radars may occur, such variability often results in significant performance degradation in fusion. To this end, we present tREADi, an adaptive inference system that accommodates the variability of multimodal sensory data and thus enables robust and efficient perception. t-READi identifies variation-sensitive yet structure-specific model parameters; it then adapts only these parameters while keeping the rest intact. t-READi also leverages a cross-modality contrastive learning method to compensate for the loss from missing modalities. Both functions are implemented to maintain compatibility with existing multimodal deep fusion methods. The extensive experiments evidently demonstrate that compared with the status quo approaches, t-READi not only improves the average inference accuracy by more than 6% but also reduces the inference latency by almost 15x with the cost of only 5% extra memory overhead in the worst case under realistic data and modal variations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09747v3-abstract-full').style.display = 'none'; document.getElementById('2410.09747v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 16 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08889">arXiv:2410.08889</a> <span> [<a href="https://arxiv.org/pdf/2410.08889">pdf</a>, <a href="https://arxiv.org/format/2410.08889">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Memory-aware Q-distribution Prediction for Nuclear Fusion via Modern Hopfield Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+Q">Qingchuan Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiao Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tong Zheng</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+X">Xiaodong Dai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yifeng Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qingquan Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08889v1-abstract-short" style="display: inline;"> This study addresses the critical challenge of predicting the Q-distribution in long-term stable nuclear fusion task, a key component for advancing clean energy solutions. We introduce an innovative deep learning framework that employs Modern Hopfield Networks to incorporate associative memory from historical shots. Utilizing a newly compiled dataset, we demonstrate the effectiveness of our approa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08889v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08889v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08889v1-abstract-full" style="display: none;"> This study addresses the critical challenge of predicting the Q-distribution in long-term stable nuclear fusion task, a key component for advancing clean energy solutions. We introduce an innovative deep learning framework that employs Modern Hopfield Networks to incorporate associative memory from historical shots. Utilizing a newly compiled dataset, we demonstrate the effectiveness of our approach in enhancing Q-distribution prediction. The proposed method represents a significant advancement by leveraging historical memory information for the first time in this context, showcasing improved prediction accuracy and contributing to the optimization of nuclear fusion research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08889v1-abstract-full').style.display = 'none'; document.getElementById('2410.08889v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06950">arXiv:2410.06950</a> <span> [<a href="https://arxiv.org/pdf/2410.06950">pdf</a>, <a href="https://arxiv.org/format/2410.06950">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Faithful Interpretation for Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+L">Lijie Hu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tianhao Huang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lu Yu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wanyu Lin</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianhang Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Di Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06950v1-abstract-short" style="display: inline;"> Currently, attention mechanisms have garnered increasing attention in Graph Neural Networks (GNNs), such as Graph Attention Networks (GATs) and Graph Transformers (GTs). It is not only due to the commendable boost in performance they offer but also its capacity to provide a more lucid rationale for model behaviors, which are often viewed as inscrutable. However, Attention-based GNNs have demonstra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06950v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06950v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06950v1-abstract-full" style="display: none;"> Currently, attention mechanisms have garnered increasing attention in Graph Neural Networks (GNNs), such as Graph Attention Networks (GATs) and Graph Transformers (GTs). It is not only due to the commendable boost in performance they offer but also its capacity to provide a more lucid rationale for model behaviors, which are often viewed as inscrutable. However, Attention-based GNNs have demonstrated instability in interpretability when subjected to various sources of perturbations during both training and testing phases, including factors like additional edges or nodes. In this paper, we propose a solution to this problem by introducing a novel notion called Faithful Graph Attention-based Interpretation (FGAI). In particular, FGAI has four crucial properties regarding stability and sensitivity to interpretation and final output distribution. Built upon this notion, we propose an efficient methodology for obtaining FGAI, which can be viewed as an ad hoc modification to the canonical Attention-based GNNs. To validate our proposed solution, we introduce two novel metrics tailored for graph interpretation assessment. Experimental results demonstrate that FGAI exhibits superior stability and preserves the interpretability of attention under various forms of perturbations and randomness, which makes FGAI a more faithful and reliable explanation tool. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06950v1-abstract-full').style.display = 'none'; document.getElementById('2410.06950v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03364">arXiv:2410.03364</a> <span> [<a href="https://arxiv.org/pdf/2410.03364">pdf</a>, <a href="https://arxiv.org/format/2410.03364">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Error Correction Code Transformer: From Non-Unified to Unified </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yongli Yan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jieao Zhu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyue Zheng</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jiaqi He</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+L">Linglong Dai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03364v1-abstract-short" style="display: inline;"> Channel coding is vital for reliable data transmission in modern wireless systems, and its significance will increase with the emergence of sixth-generation (6G) networks, which will need to support various error correction codes. However, traditional decoders were typically designed as fixed hardware circuits tailored to specific decoding algorithms, leading to inefficiencies and limited flexibil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03364v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03364v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03364v1-abstract-full" style="display: none;"> Channel coding is vital for reliable data transmission in modern wireless systems, and its significance will increase with the emergence of sixth-generation (6G) networks, which will need to support various error correction codes. However, traditional decoders were typically designed as fixed hardware circuits tailored to specific decoding algorithms, leading to inefficiencies and limited flexibility. To address these challenges, this paper proposes a unified, code-agnostic Transformer-based decoding architecture capable of handling multiple linear block codes, including Polar, Low-Density Parity-Check (LDPC), and Bose-Chaudhuri-Hocquenghem (BCH), within a single framework. To achieve this, standardized units are employed to harmonize parameters across different code types, while the redesigned unified attention module compresses the structural information of various codewords. Additionally, a sparse mask, derived from the sparsity of the parity-check matrix, is introduced to enhance the model's ability to capture inherent constraints between information and parity-check bits, resulting in improved decoding accuracy and robustness. Extensive experimental results demonstrate that the proposed unified Transformer-based decoder not only outperforms existing methods but also provides a flexible, efficient, and high-performance solution for next-generation wireless communication systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03364v1-abstract-full').style.display = 'none'; document.getElementById('2410.03364v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00392">arXiv:2410.00392</a> <span> [<a href="https://arxiv.org/pdf/2410.00392">pdf</a>, <a href="https://arxiv.org/format/2410.00392">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> MERIT: Multimodal Wearable Vital Sign Waveform Monitoring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yongyang Tang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhe Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ang Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyue Zheng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zheng Lin</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jia Xu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+P">Pin Lv</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zhe Sun</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yue Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00392v3-abstract-short" style="display: inline;"> Cardiovascular disease (CVD) is the leading cause of death and premature mortality worldwide, with occupational environments significantly influencing CVD risk, underscoring the need for effective cardiac monitoring and early warning systems. Existing methods of monitoring vital signs require subjects to remain stationary, which is impractical for daily monitoring as individuals are often in motio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00392v3-abstract-full').style.display = 'inline'; document.getElementById('2410.00392v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00392v3-abstract-full" style="display: none;"> Cardiovascular disease (CVD) is the leading cause of death and premature mortality worldwide, with occupational environments significantly influencing CVD risk, underscoring the need for effective cardiac monitoring and early warning systems. Existing methods of monitoring vital signs require subjects to remain stationary, which is impractical for daily monitoring as individuals are often in motion. To address this limitation, we propose MERIT, a multimodality-based wearable system designed for precise ECG waveform monitoring without movement restrictions. Daily activities, involving frequent arm movements, can significantly affect sensor data and complicate the reconstruction of accurate ECG signals. To mitigate motion impact and enhance ECG signal reconstruction, we introduce a deep independent component analysis (Deep-ICA) module and a multimodal fusion module. We conducted experiments with 15 subjects. Our results, compared with commercial wearable devices and existing methods, demonstrate that MERIT accurately reconstructs ECG waveforms during various office activities, offering a reliable solution for fine-grained cardiac monitoring in dynamic environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00392v3-abstract-full').style.display = 'none'; document.getElementById('2410.00392v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16311">arXiv:2409.16311</a> <span> [<a href="https://arxiv.org/pdf/2409.16311">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> New Insights into Global Warming: End-to-End Visual Analysis and Prediction of Temperature Variations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Meihua Zhou</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+N">Nan Wan</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianlong Zheng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hanwen Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Li Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tingting Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16311v1-abstract-short" style="display: inline;"> Global warming presents an unprecedented challenge to our planet however comprehensive understanding remains hindered by geographical biases temporal limitations and lack of standardization in existing research. An end to end visual analysis of global warming using three distinct temperature datasets is presented. A baseline adjusted from the Paris Agreements one point five degrees Celsius benchma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16311v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16311v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16311v1-abstract-full" style="display: none;"> Global warming presents an unprecedented challenge to our planet however comprehensive understanding remains hindered by geographical biases temporal limitations and lack of standardization in existing research. An end to end visual analysis of global warming using three distinct temperature datasets is presented. A baseline adjusted from the Paris Agreements one point five degrees Celsius benchmark based on data analysis is employed. A closed loop design from visualization to prediction and clustering is created using classic models tailored to the characteristics of the data. This approach reduces complexity and eliminates the need for advanced feature engineering. A lightweight convolutional neural network and long short term memory model specifically designed for global temperature change is proposed achieving exceptional accuracy in long term forecasting with a mean squared error of three times ten to the power of negative six and an R squared value of zero point nine nine nine nine. Dynamic time warping and KMeans clustering elucidate national level temperature anomalies and carbon emission patterns. This comprehensive method reveals intricate spatiotemporal characteristics of global temperature variations and provides warming trend attribution. The findings offer new insights into climate change dynamics demonstrating that simplicity and precision can coexist in environmental analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16311v1-abstract-full').style.display = 'none'; document.getElementById('2409.16311v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14968">arXiv:2409.14968</a> <span> [<a href="https://arxiv.org/pdf/2409.14968">pdf</a>, <a href="https://arxiv.org/format/2409.14968">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Mutation-Based Deep Learning Framework Testing Method in JavaScript Environment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+Y">Yinglong Zou</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+J">Juan Zhai</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+C">Chunrong Fang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhenyu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14968v1-abstract-short" style="display: inline;"> In recent years, Deep Learning (DL) applications in JavaScript environment have become increasingly popular. As the infrastructure for DL applications, JavaScript DL frameworks play a crucial role in the development and deployment. It is essential to ensure the quality of JavaScript DL frameworks. However, the bottleneck of limited computational resources in the JavaScript environment brings new c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14968v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14968v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14968v1-abstract-full" style="display: none;"> In recent years, Deep Learning (DL) applications in JavaScript environment have become increasingly popular. As the infrastructure for DL applications, JavaScript DL frameworks play a crucial role in the development and deployment. It is essential to ensure the quality of JavaScript DL frameworks. However, the bottleneck of limited computational resources in the JavaScript environment brings new challenges to framework testing. Specifically, JavaScript DL frameworks are equipped with various optimization mechanisms (e.g., cache reuse, inference acceleration) to overcome the bottleneck of limited computational resources. These optimization mechanisms are overlooked by existing methods, resulting in many bugs in JavaScript DL frameworks being missed. To address the above challenges, we propose a mutation-based JavaScript DL framework testing method named DLJSFuzzer. DLJSFuzzer designs 13 tensor mutation rules targeting the cache reuse mechanism to generate test input tensors. Besides, DLJSFuzzer designs eight model mutation rules targeting the inference acceleration mechanism to generate test input models. To evaluate the effectiveness of DLJSFuzzer, we conduct experiments on the most widely-used JavaScript DL framework, TensorFlow.js. The experimental results show that DLJSFuzzer outperforms state-of-the-art methods in both effectiveness and efficiency. DLJSFuzzer successfully detects 21 unique crashes and 126 unique NaN & Inconsistency bugs. All detected crashes have been reported to the open-source community, with 12 of them already confirmed by developers. Additionally, DLJSFuzzer has improved by over 47% in model generation efficiency and over 91% in bug detection efficiency compared to all baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14968v1-abstract-full').style.display = 'none'; document.getElementById('2409.14968v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12408">arXiv:2409.12408</a> <span> [<a href="https://arxiv.org/pdf/2409.12408">pdf</a>, <a href="https://arxiv.org/format/2409.12408">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Mutual Information-based Representations Disentanglement for Unaligned Multimodal Language Sequences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qian%2C+F">Fan Qian</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jiqing Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianchen Li</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yongjun He</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tieran Zheng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+G">Guibin Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12408v1-abstract-short" style="display: inline;"> The key challenge in unaligned multimodal language sequences lies in effectively integrating information from various modalities to obtain a refined multimodal joint representation. Recently, the disentangle and fuse methods have achieved the promising performance by explicitly learning modality-agnostic and modality-specific representations and then fusing them into a multimodal joint representat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12408v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12408v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12408v1-abstract-full" style="display: none;"> The key challenge in unaligned multimodal language sequences lies in effectively integrating information from various modalities to obtain a refined multimodal joint representation. Recently, the disentangle and fuse methods have achieved the promising performance by explicitly learning modality-agnostic and modality-specific representations and then fusing them into a multimodal joint representation. However, these methods often independently learn modality-agnostic representations for each modality and utilize orthogonal constraints to reduce linear correlations between modality-agnostic and modality-specific representations, neglecting to eliminate their nonlinear correlations. As a result, the obtained multimodal joint representation usually suffers from information redundancy, leading to overfitting and poor generalization of the models. In this paper, we propose a Mutual Information-based Representations Disentanglement (MIRD) method for unaligned multimodal language sequences, in which a novel disentanglement framework is designed to jointly learn a single modality-agnostic representation. In addition, the mutual information minimization constraint is employed to ensure superior disentanglement of representations, thereby eliminating information redundancy within the multimodal joint representation. Furthermore, the challenge of estimating mutual information caused by the limited labeled data is mitigated by introducing unlabeled data. Meanwhile, the unlabeled data also help to characterize the underlying structure of multimodal data, consequently further preventing overfitting and enhancing the performance of the models. Experimental results on several widely used benchmark datasets validate the effectiveness of our proposed approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12408v1-abstract-full').style.display = 'none'; document.getElementById('2409.12408v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07641">arXiv:2409.07641</a> <span> [<a href="https://arxiv.org/pdf/2409.07641">pdf</a>, <a href="https://arxiv.org/ps/2409.07641">ps</a>, <a href="https://arxiv.org/format/2409.07641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SimulBench: Evaluating Language Models with Creative Simulation Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+Q">Qi Jia</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+X">Xiang Yue</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyu Zheng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jie Huang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+B+Y">Bill Yuchen Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07641v1-abstract-short" style="display: inline;"> We introduce SimulBench, a benchmark designed to evaluate large language models (LLMs) across a diverse collection of creative simulation scenarios, such as acting as a Linux terminal or playing text games with users. While these simulation tasks serve as effective measures of an LLM's general intelligence, they are seldom incorporated into existing benchmarks. A major challenge is to develop an e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07641v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07641v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07641v1-abstract-full" style="display: none;"> We introduce SimulBench, a benchmark designed to evaluate large language models (LLMs) across a diverse collection of creative simulation scenarios, such as acting as a Linux terminal or playing text games with users. While these simulation tasks serve as effective measures of an LLM's general intelligence, they are seldom incorporated into existing benchmarks. A major challenge is to develop an evaluation framework for testing different LLMs fairly while preserving the multi-round interactive nature of simulation tasks between users and AI. To tackle this issue, we suggest using a fixed LLM as a user agent to engage with an LLM to collect dialogues first under different tasks. Then, challenging dialogue scripts are extracted for evaluating different target LLMs. To facilitate automatic assessment on \DataName{}, GPT-4 is employed as the evaluator, tasked with reviewing the quality of the final response generated by the target LLMs given multi-turn dialogue scripts. Our comprehensive experiments indicate that these simulation tasks continue to pose a significant challenge with their unique natures and show the gap between proprietary models and the most advanced open LLMs. For example, GPT-4-turbo outperforms LLaMA-3-70b-Chat on 18.55\% more cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07641v1-abstract-full').style.display = 'none'; document.getElementById('2409.07641v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Website: https://simulbench.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06851">arXiv:2409.06851</a> <span> [<a href="https://arxiv.org/pdf/2409.06851">pdf</a>, <a href="https://arxiv.org/format/2409.06851">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LIME: Less Is More for MLLM Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+K">King Zhu</a>, <a href="/search/cs?searchtype=author&query=Zang%2C+Q">Qianbo Zang</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+S">Shian Jia</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Siwei Wu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+F">Feiteng Fang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yizhi Li</a>, <a href="/search/cs?searchtype=author&query=Gavin%2C+S">Shawn Gavin</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tuney Zheng</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiawei Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haoning Wu</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+X">Xingwei Qu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zachary Liu</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+X">Xiang Yue</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J+H">J. H. Liu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chenghua Lin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Min Yang</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+S">Shiwen Ni</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06851v3-abstract-short" style="display: inline;"> Multimodal Large Language Models (MLLMs) are evaluated on various benchmarks, such as image captioning, visual question answering, and reasoning. However, many of these benchmarks include overly simple or uninformative samples, complicating the effective distinction of different MLLMs' performance. Furthermore, evaluating models across numerous benchmarks incurs a significant computational burden.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06851v3-abstract-full').style.display = 'inline'; document.getElementById('2409.06851v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06851v3-abstract-full" style="display: none;"> Multimodal Large Language Models (MLLMs) are evaluated on various benchmarks, such as image captioning, visual question answering, and reasoning. However, many of these benchmarks include overly simple or uninformative samples, complicating the effective distinction of different MLLMs' performance. Furthermore, evaluating models across numerous benchmarks incurs a significant computational burden. To address these issues, we propose LIME (Less Is More for MLLM Evaluation), a refined and efficient benchmark curated through a semi-automated pipeline. This pipeline filters out uninformative samples and eliminates answer leakage by focusing on tasks that necessitate image-based understanding. Our experiments indicate that LIME reduces the number of samples by 76% and evaluation time by 77%, while also providing a more effective means of distinguishing the capabilities of different models. Notably, we find that traditional automatic metrics, such as CIDEr, are inadequate for assessing MLLMs' captioning performance; excluding the caption task score yields a more accurate reflection of overall model performance. All code and data are available at https://github.com/kangreen0210/LIME. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06851v3-abstract-full').style.display = 'none'; document.getElementById('2409.06851v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02813">arXiv:2409.02813</a> <span> [<a href="https://arxiv.org/pdf/2409.02813">pdf</a>, <a href="https://arxiv.org/format/2409.02813">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MMMU-Pro: A More Robust Multi-discipline Multimodal Understanding Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yue%2C+X">Xiang Yue</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyu Zheng</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Y">Yuansheng Ni</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yubo Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+S">Shengbang Tong</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yuxuan Sun</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+B">Botao Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Huan Sun</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yu Su</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&query=Neubig%2C+G">Graham Neubig</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02813v2-abstract-short" style="display: inline;"> This paper introduces MMMU-Pro, a robust version of the Massive Multi-discipline Multimodal Understanding and Reasoning (MMMU) benchmark. MMMU-Pro rigorously assesses multimodal models' true understanding and reasoning capabilities through a three-step process based on MMMU: (1) filtering out questions answerable by text-only models, (2) augmenting candidate options, and (3) introducing a vision-o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02813v2-abstract-full').style.display = 'inline'; document.getElementById('2409.02813v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02813v2-abstract-full" style="display: none;"> This paper introduces MMMU-Pro, a robust version of the Massive Multi-discipline Multimodal Understanding and Reasoning (MMMU) benchmark. MMMU-Pro rigorously assesses multimodal models' true understanding and reasoning capabilities through a three-step process based on MMMU: (1) filtering out questions answerable by text-only models, (2) augmenting candidate options, and (3) introducing a vision-only input setting where questions are embedded within images. This setting challenges AI to truly "see" and "read" simultaneously, testing a fundamental human cognitive skill of seamlessly integrating visual and textual information. Results show that model performance is substantially lower on MMMU-Pro than on MMMU, ranging from 16.8% to 26.9% across models. We explore the impact of OCR prompts and Chain of Thought (CoT) reasoning, finding that OCR prompts have minimal effect while CoT generally improves performance. MMMU-Pro provides a more rigorous evaluation tool, closely mimicking real-world scenarios and offering valuable directions for future research in multimodal AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02813v2-abstract-full').style.display = 'none'; document.getElementById('2409.02813v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15585">arXiv:2408.15585</a> <span> [<a href="https://arxiv.org/pdf/2408.15585">pdf</a>, <a href="https://arxiv.org/format/2408.15585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Whisper-PMFA: Partial Multi-Scale Feature Aggregation for Speaker Verification using Whisper Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yiyang Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zehua Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chao Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Mingxing Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T+F">Thomas Fang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15585v1-abstract-short" style="display: inline;"> In this paper, Whisper, a large-scale pre-trained model for automatic speech recognition, is proposed to apply to speaker verification. A partial multi-scale feature aggregation (PMFA) approach is proposed based on a subset of Whisper encoder blocks to derive highly discriminative speaker embeddings.Experimental results demonstrate that using the middle to later blocks of the Whisper encoder keeps… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15585v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15585v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15585v1-abstract-full" style="display: none;"> In this paper, Whisper, a large-scale pre-trained model for automatic speech recognition, is proposed to apply to speaker verification. A partial multi-scale feature aggregation (PMFA) approach is proposed based on a subset of Whisper encoder blocks to derive highly discriminative speaker embeddings.Experimental results demonstrate that using the middle to later blocks of the Whisper encoder keeps more speaker information. On the VoxCeleb1 and CN-Celeb1 datasets, our system achieves 1.42% and 8.23% equal error rates (EERs) respectively, receiving 0.58% and 1.81% absolute EER reductions over the ECAPA-TDNN baseline, and 0.46% and 0.97% over the ResNet34 baseline. Furthermore, our results indicate that using Whisper models trained on multilingual data can effectively enhance the model's robustness across languages. Finally, the low-rank adaptation approach is evaluated, which reduces the trainable model parameters by approximately 45 times while only slightly increasing EER by 0.2%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15585v1-abstract-full').style.display = 'none'; document.getElementById('2408.15585v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13996">arXiv:2408.13996</a> <span> [<a href="https://arxiv.org/pdf/2408.13996">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Research Advances and New Paradigms for Biology-inspired Spiking Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyu Zheng</a>, <a href="/search/cs?searchtype=author&query=Han%2C+L">Liyuan Han</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tielin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13996v2-abstract-short" style="display: inline;"> Spiking neural networks (SNNs) are gaining popularity in the computational simulation and artificial intelligence fields owing to their biological plausibility and computational efficiency. This paper explores the historical development of SNN and concludes that these two fields are intersecting and merging rapidly. Following the successful application of Dynamic Vision Sensors (DVS) and Dynamic A… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13996v2-abstract-full').style.display = 'inline'; document.getElementById('2408.13996v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13996v2-abstract-full" style="display: none;"> Spiking neural networks (SNNs) are gaining popularity in the computational simulation and artificial intelligence fields owing to their biological plausibility and computational efficiency. This paper explores the historical development of SNN and concludes that these two fields are intersecting and merging rapidly. Following the successful application of Dynamic Vision Sensors (DVS) and Dynamic Audio Sensors (DAS), SNNs have found some proper paradigms, such as continuous visual signal tracking, automatic speech recognition, and reinforcement learning for continuous control, that have extensively supported their key features, including spike encoding, neuronal heterogeneity, specific functional circuits, and multiscale plasticity. Compared to these real-world paradigms, the brain contains a spiking version of the biology-world paradigm, which exhibits a similar level of complexity and is usually considered a mirror of the real world. Considering the projected rapid development of invasive and parallel Brain-Computer Interface (BCI), as well as the new BCI-based paradigms that include online pattern recognition and stimulus control of biological spike trains, SNNs naturally leverage their advantages in energy efficiency, robustness, and flexibility. The biological brain has inspired the present study of SNNs and effective SNN machine-learning algorithms, which can help enhance neuroscience discoveries in the brain by applying them to the new BCI paradigm. Such two-way interactions with positive feedback can accelerate brain science research and brain-inspired intelligence technology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13996v2-abstract-full').style.display = 'none'; document.getElementById('2408.13996v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12817">arXiv:2408.12817</a> <span> [<a href="https://arxiv.org/pdf/2408.12817">pdf</a>, <a href="https://arxiv.org/format/2408.12817">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Data-Driven Parametrization of Molecular Mechanics Force Fields for Expansive Chemical Space Coverage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianze Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A">Ailun Wang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yu Xia</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xingyuan Xu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+J">Jiawei Zhan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yu Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yang Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhi Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaojie Wu</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+S">Sheng Gong</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+W">Wen Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12817v2-abstract-short" style="display: inline;"> A force field is a critical component in molecular dynamics simulations for computational drug discovery. It must achieve high accuracy within the constraints of molecular mechanics' (MM) limited functional forms, which offers high computational efficiency. With the rapid expansion of synthetically accessible chemical space, traditional look-up table approaches face significant challenges. In this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12817v2-abstract-full').style.display = 'inline'; document.getElementById('2408.12817v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12817v2-abstract-full" style="display: none;"> A force field is a critical component in molecular dynamics simulations for computational drug discovery. It must achieve high accuracy within the constraints of molecular mechanics' (MM) limited functional forms, which offers high computational efficiency. With the rapid expansion of synthetically accessible chemical space, traditional look-up table approaches face significant challenges. In this study, we address this issue using a modern data-driven approach, developing ByteFF, an Amber-compatible force field for drug-like molecules. To create ByteFF, we generated an expansive and highly diverse molecular dataset at the B3LYP-D3(BJ)/DZVP level of theory. This dataset includes 2.4 million optimized molecular fragment geometries with analytical Hessian matrices, along with 3.2 million torsion profiles. We then trained an edge-augmented, symmetry-preserving molecular graph neural network (GNN) on this dataset, employing a carefully optimized training strategy. Our model predicts all bonded and non-bonded MM force field parameters for drug-like molecules simultaneously across a broad chemical space. ByteFF demonstrates state-of-the-art performance on various benchmark datasets, excelling in predicting relaxed geometries, torsional energy profiles, and conformational energies and forces. Its exceptional accuracy and expansive chemical space coverage make ByteFF a valuable tool for multiple stages of computational drug discovery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12817v2-abstract-full').style.display = 'none'; document.getElementById('2408.12817v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ByteFF, a machine learning parametrized MMFF. Code available at https://github.com/bytedance/byteff</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11834">arXiv:2408.11834</a> <span> [<a href="https://arxiv.org/pdf/2408.11834">pdf</a>, <a href="https://arxiv.org/format/2408.11834">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SCREENER: A general framework for task-specific experiment design in quantitative MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianshu Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zican Wang</a>, <a href="/search/cs?searchtype=author&query=Bray%2C+T">Timothy Bray</a>, <a href="/search/cs?searchtype=author&query=Alexander%2C+D+C">Daniel C. Alexander</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Dan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11834v1-abstract-short" style="display: inline;"> Quantitative magnetic resonance imaging (qMRI) is increasingly investigated for use in a variety of clinical tasks from diagnosis, through staging, to treatment monitoring. However, experiment design in qMRI, the identification of the optimal acquisition protocols, has been focused on obtaining the most precise parameter estimations, with no regard for the specific requirements of downstream tasks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11834v1-abstract-full').style.display = 'inline'; document.getElementById('2408.11834v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11834v1-abstract-full" style="display: none;"> Quantitative magnetic resonance imaging (qMRI) is increasingly investigated for use in a variety of clinical tasks from diagnosis, through staging, to treatment monitoring. However, experiment design in qMRI, the identification of the optimal acquisition protocols, has been focused on obtaining the most precise parameter estimations, with no regard for the specific requirements of downstream tasks. Here we propose SCREENER: A general framework for task-specific experiment design in quantitative MRI. SCREENER incorporates a task-specific objective and seeks the optimal protocol with a deep-reinforcement-learning (DRL) based optimization strategy. To illustrate this framework, we employ a task of classifying the inflammation status of bone marrow using diffusion MRI data with intravoxel incoherent motion (IVIM) modelling. Results demonstrate SCREENER outperforms previous ad hoc and optimized protocols under clinical signal-to-noise ratio (SNR) conditions, achieving significant improvement, both in binary classification tasks, e.g. from 67% to 89%, and in a multi-class classification task, from 46% to 59%. Additionally, we show this improvement is robust to the SNR. Lastly, we demonstrate the advantage of DRL-based optimization strategy, enabling zero-shot discovery of near-optimal protocols for a range of SNRs not used in training. In conclusion, SCREENER has the potential to enable wider uptake of qMRI in the clinic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11834v1-abstract-full').style.display = 'none'; document.getElementById('2408.11834v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11562">arXiv:2408.11562</a> <span> [<a href="https://arxiv.org/pdf/2408.11562">pdf</a>, <a href="https://arxiv.org/format/2408.11562">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.21437/Interspeech.2024-700">10.21437/Interspeech.2024-700 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Joint Noise Disentanglement and Adversarial Training Framework for Robust Speaker Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+X">Xujiang Xing</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Mingxing Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T+F">Thomas Fang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11562v2-abstract-short" style="display: inline;"> Automatic Speaker Verification (ASV) suffers from performance degradation in noisy conditions. To address this issue, we propose a novel adversarial learning framework that incorporates noise-disentanglement to establish a noise-independent speaker invariant embedding space. Specifically, the disentanglement module includes two encoders for separating speaker related and irrelevant information, re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11562v2-abstract-full').style.display = 'inline'; document.getElementById('2408.11562v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11562v2-abstract-full" style="display: none;"> Automatic Speaker Verification (ASV) suffers from performance degradation in noisy conditions. To address this issue, we propose a novel adversarial learning framework that incorporates noise-disentanglement to establish a noise-independent speaker invariant embedding space. Specifically, the disentanglement module includes two encoders for separating speaker related and irrelevant information, respectively. The reconstruction module serves as a regularization term to constrain the noise. A feature-robust loss is also used to supervise the speaker encoder to learn noise-independent speaker embeddings without losing speaker information. In addition, adversarial training is introduced to discourage the speaker encoder from encoding acoustic condition information for achieving a speaker-invariant embedding space. Experiments on VoxCeleb1 indicate that the proposed method improves the performance of the speaker verification system under both clean and noisy conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11562v2-abstract-full').style.display = 'none'; document.getElementById('2408.11562v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, accepted by Interspeech2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> 707-711 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Interspeech2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09851">arXiv:2408.09851</a> <span> [<a href="https://arxiv.org/pdf/2408.09851">pdf</a>, <a href="https://arxiv.org/format/2408.09851">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> ISAC-Fi: Enabling Full-fledged Monostatic Sensing over Wi-Fi Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhe Chen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chao Hu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyue Zheng</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+H">Hangcheng Cao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yanbing Yang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+Y">Yen Chu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Hongbo Jiang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jun Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09851v1-abstract-short" style="display: inline;"> Whereas Wi-Fi communications have been exploited for sensing purpose for over a decade, the bistatic or multistatic nature of Wi-Fi still poses multiple challenges, hampering real-life deployment of integrated sensing and communication (ISAC) within Wi-Fi framework. In this paper, we aim to re-design WiFi so that monostatic sensing (mimicking radar) can be achieved over the multistatic communicati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09851v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09851v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09851v1-abstract-full" style="display: none;"> Whereas Wi-Fi communications have been exploited for sensing purpose for over a decade, the bistatic or multistatic nature of Wi-Fi still poses multiple challenges, hampering real-life deployment of integrated sensing and communication (ISAC) within Wi-Fi framework. In this paper, we aim to re-design WiFi so that monostatic sensing (mimicking radar) can be achieved over the multistatic communication infrastructure. Specifically, we propose, design, and implement ISAC-Fi as an ISAC-ready Wi-Fi prototype. We first present a novel self-interference cancellation scheme, in order to extract reflected (radio frequency) signals for sensing purpose in the face of transmissions. We then subtly revise existing Wi-Fi framework so as to seamlessly operate monostatic sensing under Wi-Fi communication standard. Finally, we offer two ISAC-Fi designs: while a USRP-based one emulates a totally re-designed ISAC-Fi device, another plug-andplay design allows for backward compatibility by attaching an extra module to an arbitrary Wi-Fi device. We perform extensive experiments to validate the efficacy of ISAC-Fi and also to demonstrate its superiority over existing Wi-Fi sensing proposals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09851v1-abstract-full').style.display = 'none'; document.getElementById('2408.09851v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 22 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08072">arXiv:2408.08072</a> <span> [<a href="https://arxiv.org/pdf/2408.08072">pdf</a>, <a href="https://arxiv.org/format/2408.08072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> I-SHEEP: Self-Alignment of LLM from Scratch through an Iterative Self-Enhancement Paradigm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yiming Liang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+X">Xingwei Qu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyu Zheng</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiawei Guo</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xinrun Du</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhenzhu Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaheng Liu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chenghua Lin</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lei Ma</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiajun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08072v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have achieved significant advancements, however, the common learning paradigm treats LLMs as passive information repositories, neglecting their potential for active learning and alignment. Some approaches train LLMs using their own generated synthetic data, exploring the possibility of active alignment. However, there is still a huge gap between these one-time alignmen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08072v2-abstract-full').style.display = 'inline'; document.getElementById('2408.08072v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08072v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have achieved significant advancements, however, the common learning paradigm treats LLMs as passive information repositories, neglecting their potential for active learning and alignment. Some approaches train LLMs using their own generated synthetic data, exploring the possibility of active alignment. However, there is still a huge gap between these one-time alignment methods and the continuous automatic alignment of humans. In this paper, we introduce \textbf{I-SHEEP}, an \textbf{I}terative \textbf{S}elf-En\textbf{H}anc\textbf{E}m\textbf{E}nt \textbf{P}aradigm.This human-like paradigm enables LLMs to \textbf{continuously self-align from scratch with nothing}. Compared to the one-time alignment method Dromedary \cite{sun2023principledriven}, which refers to the first iteration in this paper, I-SHEEP can significantly enhance capacities on both Qwen and Llama models. I-SHEEP achieves a maximum relative improvement of 78.2\% in the Alpaca Eval, 24.0\% in the MT Bench, and an absolute increase of 8.88\% in the IFEval accuracy over subsequent iterations in Qwen-1.5 72B model. Additionally, I-SHEEP surpasses the base model in various standard benchmark generation tasks, achieving an average improvement of 24.77\% in code generation tasks, 12.04\% in TrivialQA, and 20.29\% in SQuAD. We also provide new insights based on the experiment results. Our codes, datasets, and models are available at \textbf{https://anonymous.4open.science/r/I-SHEEP}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08072v2-abstract-full').style.display = 'none'; document.getElementById('2408.08072v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06911">arXiv:2408.06911</a> <span> [<a href="https://arxiv.org/pdf/2408.06911">pdf</a>, <a href="https://arxiv.org/format/2408.06911">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Heterogeneous Space Fusion and Dual-Dimension Attention: A New Paradigm for Speech Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liejun Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yinfeng Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06911v1-abstract-short" style="display: inline;"> Self-supervised learning has demonstrated impressive performance in speech tasks, yet there remains ample opportunity for advancement in the realm of speech enhancement research. In addressing speech tasks, confining the attention mechanism solely to the temporal dimension poses limitations in effectively focusing on critical speech features. Considering the aforementioned issues, our study introd… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06911v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06911v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06911v1-abstract-full" style="display: none;"> Self-supervised learning has demonstrated impressive performance in speech tasks, yet there remains ample opportunity for advancement in the realm of speech enhancement research. In addressing speech tasks, confining the attention mechanism solely to the temporal dimension poses limitations in effectively focusing on critical speech features. Considering the aforementioned issues, our study introduces a novel speech enhancement framework, HFSDA, which skillfully integrates heterogeneous spatial features and incorporates a dual-dimension attention mechanism to significantly enhance speech clarity and quality in noisy environments. By leveraging self-supervised learning embeddings in tandem with Short-Time Fourier Transform (STFT) spectrogram features, our model excels at capturing both high-level semantic information and detailed spectral data, enabling a more thorough analysis and refinement of speech signals. Furthermore, we employ the innovative Omni-dimensional Dynamic Convolution (ODConv) technology within the spectrogram input branch, enabling enhanced extraction and integration of crucial information across multiple dimensions. Additionally, we refine the Conformer model by enhancing its feature extraction capabilities not only in the temporal dimension but also across the spectral domain. Extensive experiments on the VCTK-DEMAND dataset show that HFSDA is comparable to existing state-of-the-art models, confirming the validity of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06911v1-abstract-full').style.display = 'none'; document.getElementById('2408.06911v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication by IEEE International Conference on Systems, Man, and Cybernetics 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06185">arXiv:2408.06185</a> <span> [<a href="https://arxiv.org/pdf/2408.06185">pdf</a>, <a href="https://arxiv.org/format/2408.06185">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Hi-SAM: A high-scalable authentication model for satellite-ground Zero-Trust system using mean field game </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xuesong Wu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianshuai Zheng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+R">Runfang Wu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jie Ren</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junyan Guo</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Y">Ye Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06185v1-abstract-short" style="display: inline;"> As more and more Internet of Thing (IoT) devices are connected to satellite networks, the Zero-Trust Architecture brings dynamic security to the satellite-ground system, while frequent authentication creates challenges for system availability. To make the system's accommodate more IoT devices, this paper proposes a high-scalable authentication model (Hi-SAM). Hi-SAM introduces the Proof-of-Work id… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06185v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06185v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06185v1-abstract-full" style="display: none;"> As more and more Internet of Thing (IoT) devices are connected to satellite networks, the Zero-Trust Architecture brings dynamic security to the satellite-ground system, while frequent authentication creates challenges for system availability. To make the system's accommodate more IoT devices, this paper proposes a high-scalable authentication model (Hi-SAM). Hi-SAM introduces the Proof-of-Work idea to authentication, which allows device to obtain the network resource based on frequency. To optimize the frequency, mean field game is used for competition among devices, which can reduce the decision space of large-scale population games. And a dynamic time-range message authentication code is designed for security. From the test at large population scales, Hi-SAM is superior in the optimization of authentication workload and the anomaly detection efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06185v1-abstract-full').style.display = 'none'; document.getElementById('2408.06185v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05765">arXiv:2408.05765</a> <span> [<a href="https://arxiv.org/pdf/2408.05765">pdf</a>, <a href="https://arxiv.org/format/2408.05765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Scalable and Adaptive Spectral Embedding for Attributed Graph Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yunhui Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+T">Tieke He</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qing Wu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jianhua Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05765v1-abstract-short" style="display: inline;"> Attributed graph clustering, which aims to group the nodes of an attributed graph into disjoint clusters, has made promising advancements in recent years. However, most existing methods face challenges when applied to large graphs due to the expensive computational cost and high memory usage. In this paper, we introduce Scalable and Adaptive Spectral Embedding (SASE), a simple attributed graph clu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05765v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05765v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05765v1-abstract-full" style="display: none;"> Attributed graph clustering, which aims to group the nodes of an attributed graph into disjoint clusters, has made promising advancements in recent years. However, most existing methods face challenges when applied to large graphs due to the expensive computational cost and high memory usage. In this paper, we introduce Scalable and Adaptive Spectral Embedding (SASE), a simple attributed graph clustering method devoid of parameter learning. SASE comprises three main components: node features smoothing via $k$-order simple graph convolution, scalable spectral clustering using random Fourier features, and adaptive order selection. With these designs, SASE not only effectively captures global cluster structures but also exhibits linear time and space complexity relative to the graph size. Empirical results demonstrate the superiority of SASE. For example, on the ArXiv dataset with 169K nodes and 1.17M edges, SASE achieves a 6.9\% improvement in ACC and a $5.87\times$ speedup compared to the runner-up, S3GC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05765v1-abstract-full').style.display = 'none'; document.getElementById('2408.05765v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CIKM 2024 (Short Paper)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05087">arXiv:2408.05087</a> <span> [<a href="https://arxiv.org/pdf/2408.05087">pdf</a>, <a href="https://arxiv.org/format/2408.05087">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bootstrap Latents of Nodes and Neighbors for Graph Self-Supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yunhui Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huaisong Zhang</a>, <a href="/search/cs?searchtype=author&query=He%2C+T">Tieke He</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jianhua Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05087v1-abstract-short" style="display: inline;"> Contrastive learning is a significant paradigm in graph self-supervised learning. However, it requires negative samples to prevent model collapse and learn discriminative representations. These negative samples inevitably lead to heavy computation, memory overhead and class collision, compromising the representation learning. Recent studies present that methods obviating negative samples can attai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05087v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05087v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05087v1-abstract-full" style="display: none;"> Contrastive learning is a significant paradigm in graph self-supervised learning. However, it requires negative samples to prevent model collapse and learn discriminative representations. These negative samples inevitably lead to heavy computation, memory overhead and class collision, compromising the representation learning. Recent studies present that methods obviating negative samples can attain competitive performance and scalability enhancements, exemplified by bootstrapped graph latents (BGRL). However, BGRL neglects the inherent graph homophily, which provides valuable insights into underlying positive pairs. Our motivation arises from the observation that subtly introducing a few ground-truth positive pairs significantly improves BGRL. Although we can't obtain ground-truth positive pairs without labels under the self-supervised setting, edges in the graph can reflect noisy positive pairs, i.e., neighboring nodes often share the same label. Therefore, we propose to expand the positive pair set with node-neighbor pairs. Subsequently, we introduce a cross-attention module to predict the supportiveness score of a neighbor with respect to the anchor node. This score quantifies the positive support from each neighboring node, and is encoded into the training objective. Consequently, our method mitigates class collision from negative and noisy positive samples, concurrently enhancing intra-class compactness. Extensive experiments are conducted on five benchmark datasets and three downstream task node classification, node clustering, and node similarity search. The results demonstrate that our method generates node representations with enhanced intra-class compactness and achieves state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05087v1-abstract-full').style.display = 'none'; document.getElementById('2408.05087v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECML PKDD 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03979">arXiv:2408.03979</a> <span> [<a href="https://arxiv.org/pdf/2408.03979">pdf</a>, <a href="https://arxiv.org/ps/2408.03979">ps</a>, <a href="https://arxiv.org/format/2408.03979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Speaker Adaptation for Quantised End-to-End ASR Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Q">Qiuming Zhao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chao Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Mingxing Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T+F">Thomas Fang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03979v1-abstract-short" style="display: inline;"> End-to-end models have shown superior performance for automatic speech recognition (ASR). However, such models are often very large in size and thus challenging to deploy on resource-constrained edge devices. While quantisation can reduce model sizes, it can lead to increased word error rates (WERs). Although improved quantisation methods were proposed to address the issue of performance degradati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03979v1-abstract-full').style.display = 'inline'; document.getElementById('2408.03979v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03979v1-abstract-full" style="display: none;"> End-to-end models have shown superior performance for automatic speech recognition (ASR). However, such models are often very large in size and thus challenging to deploy on resource-constrained edge devices. While quantisation can reduce model sizes, it can lead to increased word error rates (WERs). Although improved quantisation methods were proposed to address the issue of performance degradation, the fact that quantised models deployed on edge devices often target only on a small group of users is under-explored. To this end, we propose personalisation for quantised models (P4Q), a novel strategy that uses speaker adaptation (SA) to improve quantised end-to-end ASR models by fitting them to the characteristics of the target speakers. In this paper, we study the P4Q strategy based on Whisper and Conformer attention-based encoder-decoder (AED) end-to-end ASR models, which leverages a 4-bit block-wise NormalFloat4 (NF4) approach for quantisation and the low-rank adaptation (LoRA) approach for SA. Experimental results on the LibriSpeech and the TED-LIUM 3 corpora show that, with a 7-time reduction in model size and 1% extra speaker-specific parameters, 15.1% and 23.3% relative WER reductions were achieved on quantised Whisper and Conformer AED models respectively, comparing to the full precision models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03979v1-abstract-full').style.display = 'none'; document.getElementById('2408.03979v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ASRU 2023 Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03765">arXiv:2408.03765</a> <span> [<a href="https://arxiv.org/pdf/2408.03765">pdf</a>, <a href="https://arxiv.org/format/2408.03765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TKDE.2024.3435887">10.1109/TKDE.2024.3435887 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Reliable Node Similarity Matrix Guided Contrastive Graph Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yunhui Liu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xinyi Gao</a>, <a href="/search/cs?searchtype=author&query=He%2C+T">Tieke He</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jianhua Zhao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+H">Hongzhi Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03765v1-abstract-short" style="display: inline;"> Graph clustering, which involves the partitioning of nodes within a graph into disjoint clusters, holds significant importance for numerous subsequent applications. Recently, contrastive learning, known for utilizing supervisory information, has demonstrated encouraging results in deep graph clustering. This methodology facilitates the learning of favorable node representations for clustering by a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03765v1-abstract-full').style.display = 'inline'; document.getElementById('2408.03765v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03765v1-abstract-full" style="display: none;"> Graph clustering, which involves the partitioning of nodes within a graph into disjoint clusters, holds significant importance for numerous subsequent applications. Recently, contrastive learning, known for utilizing supervisory information, has demonstrated encouraging results in deep graph clustering. This methodology facilitates the learning of favorable node representations for clustering by attracting positively correlated node pairs and distancing negatively correlated pairs within the representation space. Nevertheless, a significant limitation of existing methods is their inadequacy in thoroughly exploring node-wise similarity. For instance, some hypothesize that the node similarity matrix within the representation space is identical, ignoring the inherent semantic relationships among nodes. Given the fundamental role of instance similarity in clustering, our research investigates contrastive graph clustering from the perspective of the node similarity matrix. We argue that an ideal node similarity matrix within the representation space should accurately reflect the inherent semantic relationships among nodes, ensuring the preservation of semantic similarities in the learned representations. In response to this, we introduce a new framework, Reliable Node Similarity Matrix Guided Contrastive Graph Clustering (NS4GC), which estimates an approximately ideal node similarity matrix within the representation space to guide representation learning. Our method introduces node-neighbor alignment and semantic-aware sparsification, ensuring the node similarity matrix is both accurate and efficiently sparse. Comprehensive experiments conducted on $8$ real-world datasets affirm the efficacy of learning the node similarity matrix and the superior performance of NS4GC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03765v1-abstract-full').style.display = 'none'; document.getElementById('2408.03765v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Knowledge and Data Engineering (TKDE)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02814">arXiv:2408.02814</a> <span> [<a href="https://arxiv.org/pdf/2408.02814">pdf</a>, <a href="https://arxiv.org/format/2408.02814">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Pre-trained Encoder Inference: Revealing Upstream Encoders In Downstream Machine Learning Services </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fu%2C+S">Shaopeng Fu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xuexue Sun</a>, <a href="/search/cs?searchtype=author&query=Qing%2C+K">Ke Qing</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianhang Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Di Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02814v1-abstract-short" style="display: inline;"> Though pre-trained encoders can be easily accessed online to build downstream machine learning (ML) services quickly, various attacks have been designed to compromise the security and privacy of these encoders. While most attacks target encoders on the upstream side, it remains unknown how an encoder could be threatened when deployed in a downstream ML service. This paper unveils a new vulnerabili… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02814v1-abstract-full').style.display = 'inline'; document.getElementById('2408.02814v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02814v1-abstract-full" style="display: none;"> Though pre-trained encoders can be easily accessed online to build downstream machine learning (ML) services quickly, various attacks have been designed to compromise the security and privacy of these encoders. While most attacks target encoders on the upstream side, it remains unknown how an encoder could be threatened when deployed in a downstream ML service. This paper unveils a new vulnerability: the Pre-trained Encoder Inference (PEI) attack, which posts privacy threats toward encoders hidden behind downstream ML services. By only providing API accesses to a targeted downstream service and a set of candidate encoders, the PEI attack can infer which encoder is secretly used by the targeted service based on candidate ones. We evaluate the attack performance of PEI against real-world encoders on three downstream tasks: image classification, text classification, and text-to-image generation. Experiments show that the PEI attack succeeds in revealing the hidden encoder in most cases and seldom makes mistakes even when the hidden encoder is not in the candidate set. We also conducted a case study on one of the most recent vision-language models, LLaVA, to illustrate that the PEI attack is useful in assisting other ML attacks such as adversarial attacks. The code is available at https://github.com/fshp971/encoder-inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02814v1-abstract-full').style.display = 'none'; document.getElementById('2408.02814v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02559">arXiv:2408.02559</a> <span> [<a href="https://arxiv.org/pdf/2408.02559">pdf</a>, <a href="https://arxiv.org/format/2408.02559">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating and Enhancing LLMs Agent based on Theory of Mind in Guandan: A Multi-Player Cooperative Game under Imperfect Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yim%2C+Y">Yauwai Yim</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+C">Chunkit Chan</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+T">Tianyu Shi</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zheye Deng</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+W">Wei Fan</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianshi Zheng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yangqiu Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02559v1-abstract-short" style="display: inline;"> Large language models (LLMs) have shown success in handling simple games with imperfect information and enabling multi-agent coordination, but their ability to facilitate practical collaboration against other agents in complex, imperfect information environments, especially in a non-English environment, still needs to be explored. This study investigates the applicability of knowledge acquired by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02559v1-abstract-full').style.display = 'inline'; document.getElementById('2408.02559v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02559v1-abstract-full" style="display: none;"> Large language models (LLMs) have shown success in handling simple games with imperfect information and enabling multi-agent coordination, but their ability to facilitate practical collaboration against other agents in complex, imperfect information environments, especially in a non-English environment, still needs to be explored. This study investigates the applicability of knowledge acquired by open-source and API-based LLMs to sophisticated text-based games requiring agent collaboration under imperfect information, comparing their performance to established baselines using other types of agents. We propose a Theory of Mind (ToM) planning technique that allows LLM agents to adapt their strategy against various adversaries using only game rules, current state, and historical context as input. An external tool was incorporated to mitigate the challenge of dynamic and extensive action spaces in this card game. Our results show that although a performance gap exists between current LLMs and state-of-the-art reinforcement learning (RL) models, LLMs demonstrate ToM capabilities in this game setting. It consistently improves their performance against opposing agents, suggesting their ability to understand the actions of allies and adversaries and establish collaboration with allies. To encourage further research and understanding, we have made our codebase openly accessible. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02559v1-abstract-full').style.display = 'none'; document.getElementById('2408.02559v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20564">arXiv:2407.20564</a> <span> [<a href="https://arxiv.org/pdf/2407.20564">pdf</a>, <a href="https://arxiv.org/format/2407.20564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CLR-Fact: Evaluating the Complex Logical Reasoning Capability of Large Language Models over Factual Knowledge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianshi Zheng</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+J">Jiaxin Bai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yicheng Wang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+T">Tianqing Fang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yue Guo</a>, <a href="/search/cs?searchtype=author&query=Yim%2C+Y">Yauwai Yim</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yangqiu Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20564v1-abstract-short" style="display: inline;"> While large language models (LLMs) have demonstrated impressive capabilities across various natural language processing tasks by acquiring rich factual knowledge from their broad training data, their ability to synthesize and logically reason with this knowledge in complex ways remains underexplored. In this work, we present a systematic evaluation of state-of-the-art LLMs' complex logical reasoni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20564v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20564v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20564v1-abstract-full" style="display: none;"> While large language models (LLMs) have demonstrated impressive capabilities across various natural language processing tasks by acquiring rich factual knowledge from their broad training data, their ability to synthesize and logically reason with this knowledge in complex ways remains underexplored. In this work, we present a systematic evaluation of state-of-the-art LLMs' complex logical reasoning abilities through a novel benchmark of automatically generated complex reasoning questions over general domain and biomedical knowledge graphs. Our extensive experiments, employing diverse in-context learning techniques, reveal that LLMs excel at reasoning over general world knowledge but face significant challenges with specialized domain-specific knowledge. We find that prompting with explicit Chain-of-Thought demonstrations can substantially improve LLM performance on complex logical reasoning tasks with diverse logical operations. Interestingly, our controlled evaluations uncover an asymmetry where LLMs display proficiency at set union operations, but struggle considerably with set intersections - a key building block of logical reasoning. To foster further work, we will publicly release our evaluation benchmark and code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20564v1-abstract-full').style.display = 'none'; document.getElementById('2407.20564v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18962">arXiv:2407.18962</a> <span> [<a href="https://arxiv.org/pdf/2407.18962">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Autonomous Navigation of Unmanned Vehicle Through Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+L">Letian Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiabei Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Haopeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyao Zheng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+T">Tongzhou Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lipeng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18962v1-abstract-short" style="display: inline;"> This paper explores the method of achieving autonomous navigation of unmanned vehicles through Deep Reinforcement Learning (DRL). The focus is on using the Deep Deterministic Policy Gradient (DDPG) algorithm to address issues in high-dimensional continuous action spaces. The paper details the model of a Ackermann robot and the structure and application of the DDPG algorithm. Experiments were condu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18962v1-abstract-full').style.display = 'inline'; document.getElementById('2407.18962v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18962v1-abstract-full" style="display: none;"> This paper explores the method of achieving autonomous navigation of unmanned vehicles through Deep Reinforcement Learning (DRL). The focus is on using the Deep Deterministic Policy Gradient (DDPG) algorithm to address issues in high-dimensional continuous action spaces. The paper details the model of a Ackermann robot and the structure and application of the DDPG algorithm. Experiments were conducted in a simulation environment to verify the feasibility of the improved algorithm. The results demonstrate that the DDPG algorithm outperforms traditional Deep Q-Network (DQN) and Double Deep Q-Network (DDQN) algorithms in path planning tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18962v1-abstract-full').style.display = 'none'; document.getElementById('2407.18962v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17070">arXiv:2407.17070</a> <span> [<a href="https://arxiv.org/pdf/2407.17070">pdf</a>, <a href="https://arxiv.org/format/2407.17070">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Curriculum Negative Mining For Temporal Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Ziyue Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tongya Zheng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Mingli Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17070v1-abstract-short" style="display: inline;"> Temporal networks are effective in capturing the evolving interactions of networks over time, such as social networks and e-commerce networks. In recent years, researchers have primarily concentrated on developing specific model architectures for Temporal Graph Neural Networks (TGNNs) in order to improve the representation quality of temporal nodes and edges. However, limited attention has been gi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17070v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17070v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17070v1-abstract-full" style="display: none;"> Temporal networks are effective in capturing the evolving interactions of networks over time, such as social networks and e-commerce networks. In recent years, researchers have primarily concentrated on developing specific model architectures for Temporal Graph Neural Networks (TGNNs) in order to improve the representation quality of temporal nodes and edges. However, limited attention has been given to the quality of negative samples during the training of TGNNs. When compared with static networks, temporal networks present two specific challenges for negative sampling: positive sparsity and positive shift. Positive sparsity refers to the presence of a single positive sample amidst numerous negative samples at each timestamp, while positive shift relates to the variations in positive samples across different timestamps. To robustly address these challenges in training TGNNs, we introduce Curriculum Negative Mining (CurNM), a model-aware curriculum learning framework that adaptively adjusts the difficulty of negative samples. Within this framework, we first establish a dynamically updated negative pool that balances random, historical, and hard negatives to address the challenges posed by positive sparsity. Secondly, we implement a temporal-aware negative selection module that focuses on learning from the disentangled factors of recently active edges, thus accurately capturing shifting preferences. Extensive experiments on 12 datasets and 3 TGNNs demonstrate that our method outperforms baseline methods by a significant margin. Additionally, thorough ablation studies and parameter sensitivity experiments verify the usefulness and robustness of our approach. Our code is available at https://github.com/zziyue83/CurNM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17070v1-abstract-full').style.display = 'none'; document.getElementById('2407.17070v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15389">arXiv:2407.15389</a> <span> [<a href="https://arxiv.org/pdf/2407.15389">pdf</a>, <a href="https://arxiv.org/format/2407.15389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Poisoning with A Pill: Circumventing Detection in Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+H">Hanxi Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianhang Zheng</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+H">Haibing Guan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiangyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15389v1-abstract-short" style="display: inline;"> Without direct access to the client's data, federated learning (FL) is well-known for its unique strength in data privacy protection among existing distributed machine learning techniques. However, its distributive and iterative nature makes FL inherently vulnerable to various poisoning attacks. To counteract these threats, extensive defenses have been proposed to filter out malicious clients, usi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15389v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15389v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15389v1-abstract-full" style="display: none;"> Without direct access to the client's data, federated learning (FL) is well-known for its unique strength in data privacy protection among existing distributed machine learning techniques. However, its distributive and iterative nature makes FL inherently vulnerable to various poisoning attacks. To counteract these threats, extensive defenses have been proposed to filter out malicious clients, using various detection metrics. Based on our analysis of existing attacks and defenses, we find that there is a lack of attention to model redundancy. In neural networks, various model parameters contribute differently to the model's performance. However, existing attacks in FL manipulate all the model update parameters with the same strategy, making them easily detectable by common defenses. Meanwhile, the defenses also tend to analyze the overall statistical features of the entire model updates, leaving room for sophisticated attacks. Based on these observations, this paper proposes a generic and attack-agnostic augmentation approach designed to enhance the effectiveness and stealthiness of existing FL poisoning attacks against detection in FL, pointing out the inherent flaws of existing defenses and exposing the necessity of fine-grained FL security. Specifically, we employ a three-stage methodology that strategically constructs, generates, and injects poison (generated by existing attacks) into a pill (a tiny subnet with a novel structure) during the FL training, named as pill construction, pill poisoning, and pill injection accordingly. Extensive experimental results show that FL poisoning attacks enhanced by our method can bypass all the popular defenses, and can gain an up to 7x error rate increase, as well as on average a more than 2x error rate increase on both IID and non-IID data, in both cross-silo and cross-device FL systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15389v1-abstract-full').style.display = 'none'; document.getElementById('2407.15389v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15325">arXiv:2407.15325</a> <span> [<a href="https://arxiv.org/pdf/2407.15325">pdf</a>, <a href="https://arxiv.org/format/2407.15325">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Odyssey: Empowering Minecraft Agents with Open-World Skills </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shunyu Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yaoru Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kongcheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+Z">Zhenyu Cui</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+W">Wenkai Fang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuxuan Zheng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tongya Zheng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Mingli Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15325v2-abstract-short" style="display: inline;"> Recent studies have delved into constructing generalist agents for open-world environments like Minecraft. Despite the encouraging results, existing efforts mainly focus on solving basic programmatic tasks, e.g., material collection and tool-crafting following the Minecraft tech-tree, treating the ObtainDiamond task as the ultimate goal. This limitation stems from the narrowly defined set of actio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15325v2-abstract-full').style.display = 'inline'; document.getElementById('2407.15325v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15325v2-abstract-full" style="display: none;"> Recent studies have delved into constructing generalist agents for open-world environments like Minecraft. Despite the encouraging results, existing efforts mainly focus on solving basic programmatic tasks, e.g., material collection and tool-crafting following the Minecraft tech-tree, treating the ObtainDiamond task as the ultimate goal. This limitation stems from the narrowly defined set of actions available to agents, requiring them to learn effective long-horizon strategies from scratch. Consequently, discovering diverse gameplay opportunities in the open world becomes challenging. In this work, we introduce Odyssey, a new framework that empowers Large Language Model (LLM)-based agents with open-world skills to explore the vast Minecraft world. Odyssey comprises three key parts: (1) An interactive agent with an open-world skill library that consists of 40 primitive skills and 183 compositional skills. (2) A fine-tuned LLaMA-3 model trained on a large question-answering dataset with 390k+ instruction entries derived from the Minecraft Wiki. (3) A new agent capability benchmark includes the long-term planning task, the dynamic-immediate planning task, and the autonomous exploration task. Extensive experiments demonstrate that the proposed Odyssey framework can effectively evaluate different capabilities of LLM-based agents. All datasets, model weights, and code are publicly available to motivate future research on more advanced autonomous agent solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15325v2-abstract-full').style.display = 'none'; document.getElementById('2407.15325v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13778">arXiv:2407.13778</a> <span> [<a href="https://arxiv.org/pdf/2407.13778">pdf</a>, <a href="https://arxiv.org/format/2407.13778">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Assessing the Potential of PlanetScope Satellite Imagery to Estimate Particulate Matter Oxidative Potential </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hough%2C+I">Ian Hough</a>, <a href="/search/cs?searchtype=author&query=Argentier%2C+L">Lo茂c Argentier</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Ziyang Jiang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tongshu Zheng</a>, <a href="/search/cs?searchtype=author&query=Bergin%2C+M">Mike Bergin</a>, <a href="/search/cs?searchtype=author&query=Carlson%2C+D">David Carlson</a>, <a href="/search/cs?searchtype=author&query=Jaffrezo%2C+J">Jean-Luc Jaffrezo</a>, <a href="/search/cs?searchtype=author&query=Chanussot%2C+J">Jocelyn Chanussot</a>, <a href="/search/cs?searchtype=author&query=Uzu%2C+G">Ga毛lle Uzu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13778v1-abstract-short" style="display: inline;"> Oxidative potential (OP), which measures particulate matter's (PM) capacity to induce oxidative stress in the lungs, is increasingly recognized as an indicator of PM toxicity. Since OP is not routinely monitored, it can be challenging to estimate exposure and health impacts. Remote sensing data are commonly used to estimate PM mass concentration, but have never been used to estimate OP. In this st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13778v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13778v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13778v1-abstract-full" style="display: none;"> Oxidative potential (OP), which measures particulate matter's (PM) capacity to induce oxidative stress in the lungs, is increasingly recognized as an indicator of PM toxicity. Since OP is not routinely monitored, it can be challenging to estimate exposure and health impacts. Remote sensing data are commonly used to estimate PM mass concentration, but have never been used to estimate OP. In this study, we evaluate the potential of satellite images to estimate OP as measured by acellular ascorbic acid (OP AA) and dithiothreitol (OP DTT) assays of 24-hour PM10 sampled periodically over five years at three locations around Grenoble, France. We use a deep convolutional neural network to extract features of daily 3 m/pixel PlanetScope satellite images and train a multilayer perceptron to estimate OP at a 1 km spatial resolution based on the image features and common meteorological variables. The model captures more than half of the variation in OP AA and almost half of the variation in OP DTT (test set R2 = 0.62 and 0.48, respectively), with relative mean absolute error (MAE) of about 32%. Using only satellite images, the model still captures about half of the variation in OP AA and one third of the variation in OP DTT (test set R2 = 0.49 and 0.36, respectively) with relative MAE of about 37%. If confirmed in other areas, our approach could represent a low-cost method for expanding the temporal or spatial coverage of OP estimates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13778v1-abstract-full').style.display = 'none'; document.getElementById('2407.13778v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09904">arXiv:2407.09904</a> <span> [<a href="https://arxiv.org/pdf/2407.09904">pdf</a>, <a href="https://arxiv.org/format/2407.09904">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning a Mini-batch Graph Transformer via Two-stage Interaction Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenda Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kaixuan Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shunyu Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tongya Zheng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenjie Huang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Mingli Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09904v1-abstract-short" style="display: inline;"> Mini-batch Graph Transformer (MGT), as an emerging graph learning model, has demonstrated significant advantages in semi-supervised node prediction tasks with improved computational efficiency and enhanced model robustness. However, existing methods for processing local information either rely on sampling or simple aggregation, which respectively result in the loss and squashing of critical neighb… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09904v1-abstract-full').style.display = 'inline'; document.getElementById('2407.09904v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09904v1-abstract-full" style="display: none;"> Mini-batch Graph Transformer (MGT), as an emerging graph learning model, has demonstrated significant advantages in semi-supervised node prediction tasks with improved computational efficiency and enhanced model robustness. However, existing methods for processing local information either rely on sampling or simple aggregation, which respectively result in the loss and squashing of critical neighbor information.Moreover, the limited number of nodes in each mini-batch restricts the model's capacity to capture the global characteristic of the graph. In this paper, we propose LGMformer, a novel MGT model that employs a two-stage augmented interaction strategy, transitioning from local to global perspectives, to address the aforementioned bottlenecks.The local interaction augmentation (LIA) presents a neighbor-target interaction Transformer (NTIformer) to acquire an insightful understanding of the co-interaction patterns between neighbors and the target node, resulting in a locally effective token list that serves as input for the MGT. In contrast, global interaction augmentation (GIA) adopts a cross-attention mechanism to incorporate entire graph prototypes into the target node epresentation, thereby compensating for the global graph information to ensure a more comprehensive perception. To this end, LGMformer achieves the enhancement of node representations under the MGT paradigm.Experimental results related to node classification on the ten benchmark datasets demonstrate the effectiveness of the proposed method. Our code is available at https://github.com/l-wd/LGMformer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09904v1-abstract-full').style.display = 'none'; document.getElementById('2407.09904v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures, Accept by ECAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08942">arXiv:2407.08942</a> <span> [<a href="https://arxiv.org/pdf/2407.08942">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Neural Matrix Decomposition Recommender System Model based on the Multimodal Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiang%2C+A">Ao Xiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Bingjie Huang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xinyu Guo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haowei Yang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyao Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08942v1-abstract-short" style="display: inline;"> Recommendation systems have become an important solution to information search problems. This article proposes a neural matrix factorization recommendation system model based on the multimodal large language model called BoNMF. This model combines BoBERTa's powerful capabilities in natural language processing, ViT in computer in vision, and neural matrix decomposition technology. By capturing the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08942v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08942v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08942v1-abstract-full" style="display: none;"> Recommendation systems have become an important solution to information search problems. This article proposes a neural matrix factorization recommendation system model based on the multimodal large language model called BoNMF. This model combines BoBERTa's powerful capabilities in natural language processing, ViT in computer in vision, and neural matrix decomposition technology. By capturing the potential characteristics of users and items, and after interacting with a low-dimensional matrix composed of user and item IDs, the neural network outputs the results. recommend. Cold start and ablation experimental results show that the BoNMF model exhibits excellent performance on large public data sets and significantly improves the accuracy of recommendations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08942v1-abstract-full').style.display = 'none'; document.getElementById('2407.08942v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Zheng%2C+T&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository