Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 6,658 results for author: <span class="mathjax">Wang, X</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Wang%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Wang, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Wang%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Wang, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wang%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14429">arXiv:2411.14429</a> <span> [<a href="https://arxiv.org/pdf/2411.14429">pdf</a>, <a href="https://arxiv.org/format/2411.14429">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Revisiting the Integration of Convolution and Attention for Vision Backbone </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lei Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinjiang Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wayne Zhang</a>, <a href="/search/cs?searchtype=author&query=Lau%2C+R+W+H">Rynson W. H. Lau</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14429v1-abstract-short" style="display: inline;"> Convolutions (Convs) and multi-head self-attentions (MHSAs) are typically considered alternatives to each other for building vision backbones. Although some works try to integrate both, they apply the two operators simultaneously at the finest pixel granularity. With Convs responsible for per-pixel feature extraction already, the question is whether we still need to include the heavy MHSAs at such… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14429v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14429v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14429v1-abstract-full" style="display: none;"> Convolutions (Convs) and multi-head self-attentions (MHSAs) are typically considered alternatives to each other for building vision backbones. Although some works try to integrate both, they apply the two operators simultaneously at the finest pixel granularity. With Convs responsible for per-pixel feature extraction already, the question is whether we still need to include the heavy MHSAs at such a fine-grained level. In fact, this is the root cause of the scalability issue w.r.t. the input resolution for vision transformers. To address this important problem, we propose in this work to use MSHAs and Convs in parallel \textbf{at different granularity levels} instead. Specifically, in each layer, we use two different ways to represent an image: a fine-grained regular grid and a coarse-grained set of semantic slots. We apply different operations to these two representations: Convs to the grid for local features, and MHSAs to the slots for global features. A pair of fully differentiable soft clustering and dispatching modules is introduced to bridge the grid and set representations, thus enabling local-global fusion. Through extensive experiments on various vision tasks, we empirically verify the potential of the proposed integration scheme, named \textit{GLMix}: by offloading the burden of fine-grained features to light-weight Convs, it is sufficient to use MHSAs in a few (e.g., 64) semantic slots to match the performance of recent state-of-the-art backbones, while being more efficient. Our visualization results also demonstrate that the soft clustering module produces a meaningful semantic grouping effect with only IN1k classification supervision, which may induce better interpretability and inspire new weakly-supervised semantic segmentation approaches. Code will be available at \url{https://github.com/rayleizhu/GLMix}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14429v1-abstract-full').style.display = 'none'; document.getElementById('2411.14429v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14292">arXiv:2411.14292</a> <span> [<a href="https://arxiv.org/pdf/2411.14292">pdf</a>, <a href="https://arxiv.org/format/2411.14292">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Hypothesis testing of symmetry in quantum dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yu-Ao Chen</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+C">Chenghong Zhu</a>, <a href="/search/cs?searchtype=author&query=He%2C+K">Keming He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yingjian Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14292v1-abstract-short" style="display: inline;"> Symmetry plays a crucial role in quantum physics, dictating the behavior and dynamics of physical systems. In this paper, We develop a hypothesis-testing framework for quantum dynamics symmetry using a limited number of queries to the unknown unitary operation and establish the quantum max-relative entropy lower bound for the type-II error. We construct optimal ancilla-free protocols that achieve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14292v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14292v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14292v1-abstract-full" style="display: none;"> Symmetry plays a crucial role in quantum physics, dictating the behavior and dynamics of physical systems. In this paper, We develop a hypothesis-testing framework for quantum dynamics symmetry using a limited number of queries to the unknown unitary operation and establish the quantum max-relative entropy lower bound for the type-II error. We construct optimal ancilla-free protocols that achieve optimal type-II error probability for testing time-reversal symmetry (T-symmetry) and diagonal symmetry (Z-symmetry) with limited queries. Contrasting with the advantages of indefinite causal order strategies in various quantum information processing tasks, we show that parallel, adaptive, and indefinite causal order strategies have equal power for our tasks. We establish optimal protocols for T-symmetry testing and Z-symmetry testing for 6 and 5 queries, respectively, from which we infer that the type-II error exhibits a decay rate of $\mathcal{O}(m^{-2})$ with respect to the number of queries $m$. This represents a significant improvement over the basic repetition protocols without using global entanglement, where the error decays at a slower rate of $\mathcal{O}(m^{-1})$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14292v1-abstract-full').style.display = 'none'; document.getElementById('2411.14292v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14072">arXiv:2411.14072</a> <span> [<a href="https://arxiv.org/pdf/2411.14072">pdf</a>, <a href="https://arxiv.org/format/2411.14072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> The Master-Slave Encoder Model for Improving Patent Text Summarization: A New Approach to Combining Specifications and Claims </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Shu Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zhengda Zhou</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+H">Haohan Yi</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xuhui Zheng</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+H">Hao Wan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14072v1-abstract-short" style="display: inline;"> In order to solve the problem of insufficient generation quality caused by traditional patent text abstract generation models only originating from patent specifications, the problem of new terminology OOV caused by rapid patent updates, and the problem of information redundancy caused by insufficient consideration of the high professionalism, accuracy, and uniqueness of patent texts, we proposes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14072v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14072v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14072v1-abstract-full" style="display: none;"> In order to solve the problem of insufficient generation quality caused by traditional patent text abstract generation models only originating from patent specifications, the problem of new terminology OOV caused by rapid patent updates, and the problem of information redundancy caused by insufficient consideration of the high professionalism, accuracy, and uniqueness of patent texts, we proposes a patent text abstract generation model (MSEA) based on a master-slave encoder architecture; Firstly, the MSEA model designs a master-slave encoder, which combines the instructions in the patent text with the claims as input, and fully explores the characteristics and details between the two through the master-slave encoder; Then, the model enhances the consideration of new technical terms in the input sequence based on the pointer network, and further enhances the correlation with the input text by re weighing the "remembered" and "for-gotten" parts of the input sequence from the encoder; Finally, an enhanced repetition suppression mechanism for patent text was introduced to ensure accurate and non redundant abstracts generated. On a publicly available patent text dataset, compared to the state-of-the-art model, Improved Multi-Head Attention Mechanism (IMHAM), the MSEA model achieves an improvement of 0.006, 0.005, and 0.005 in Rouge-1, Rouge-2, and Rouge-L scores, respectively. MSEA leverages the characteristics of patent texts to effectively enhance the quality of patent text generation, demonstrating its advancement and effectiveness in the experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14072v1-abstract-full').style.display = 'none'; document.getElementById('2411.14072v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13826">arXiv:2411.13826</a> <span> [<a href="https://arxiv.org/pdf/2411.13826">pdf</a>, <a href="https://arxiv.org/format/2411.13826">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Interactive and Expressive Code-Augmented Planning with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+A+Z">Anthony Z. Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinhe Wang</a>, <a href="/search/cs?searchtype=author&query=Sansom%2C+J">Jacob Sansom</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yao Fu</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+J">Jongwook Choi</a>, <a href="/search/cs?searchtype=author&query=Sohn%2C+S">Sungryull Sohn</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jaekyeom Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Honglak Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13826v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) demonstrate strong abilities in common-sense reasoning and interactive decision-making, but often struggle with complex, long-horizon planning tasks. Recent techniques have sought to structure LLM outputs using control flow and other code-adjacent techniques to improve planning performance. These techniques include using variables (to track important information) and f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13826v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13826v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13826v1-abstract-full" style="display: none;"> Large Language Models (LLMs) demonstrate strong abilities in common-sense reasoning and interactive decision-making, but often struggle with complex, long-horizon planning tasks. Recent techniques have sought to structure LLM outputs using control flow and other code-adjacent techniques to improve planning performance. These techniques include using variables (to track important information) and functions (to divide complex tasks into smaller re-usable sub-tasks). However, purely code-based approaches can be error-prone and insufficient for handling ambiguous or unstructured data. To address these challenges, we propose REPL-Plan, an LLM planning approach that is fully code-expressive (it can utilize all the benefits of code) while also being dynamic (it can flexibly adapt from errors and use the LLM for fuzzy situations). In REPL-Plan, an LLM solves tasks by interacting with a Read-Eval-Print Loop (REPL), which iteratively executes and evaluates code, similar to language shells or interactive code notebooks, allowing the model to flexibly correct errors and handle tasks dynamically. We demonstrate that REPL-Plan achieves strong results across various planning domains compared to previous methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13826v1-abstract-full').style.display = 'none'; document.getElementById('2411.13826v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13504">arXiv:2411.13504</a> <span> [<a href="https://arxiv.org/pdf/2411.13504">pdf</a>, <a href="https://arxiv.org/format/2411.13504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Disentangling Memory and Reasoning Ability in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+M">Mingyu Jin</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+W">Weidi Luo</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">Sitao Cheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+W">Wenyue Hua</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruixiang Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W+Y">William Yang Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongfeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13504v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated strong performance in handling complex tasks requiring both extensive knowledge and reasoning abilities. However, the existing LLM inference pipeline operates as an opaque process without explicit separation between knowledge retrieval and reasoning steps, making the model's decision-making process unclear and disorganized. This ambiguity can lead to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13504v2-abstract-full').style.display = 'inline'; document.getElementById('2411.13504v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13504v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated strong performance in handling complex tasks requiring both extensive knowledge and reasoning abilities. However, the existing LLM inference pipeline operates as an opaque process without explicit separation between knowledge retrieval and reasoning steps, making the model's decision-making process unclear and disorganized. This ambiguity can lead to issues such as hallucinations and knowledge forgetting, which significantly impact the reliability of LLMs in high-stakes domains. In this paper, we propose a new inference paradigm that decomposes the complex inference process into two distinct and clear actions: (1) memory recall: which retrieves relevant knowledge, and (2) reasoning: which performs logical steps based on the recalled knowledge. To facilitate this decomposition, we introduce two special tokens memory and reason, guiding the model to distinguish between steps that require knowledge retrieval and those that involve reasoning. Our experiment results show that this decomposition not only improves model performance but also enhances the interpretability of the inference process, enabling users to identify sources of error and refine model responses effectively. The code is available at https://github.com/MingyuJ666/Disentangling-Memory-and-Reasoning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13504v2-abstract-full').style.display = 'none'; document.getElementById('2411.13504v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13136">arXiv:2411.13136</a> <span> [<a href="https://arxiv.org/pdf/2411.13136">pdf</a>, <a href="https://arxiv.org/format/2411.13136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaming Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jingjing Chen</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xingjun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13136v1-abstract-short" style="display: inline;"> Large pre-trained Vision-Language Models (VLMs) such as CLIP have demonstrated excellent zero-shot generalizability across various downstream tasks. However, recent studies have shown that the inference performance of CLIP can be greatly degraded by small adversarial perturbations, especially its visual modality, posing significant safety threats. To mitigate this vulnerability, in this paper, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13136v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13136v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13136v1-abstract-full" style="display: none;"> Large pre-trained Vision-Language Models (VLMs) such as CLIP have demonstrated excellent zero-shot generalizability across various downstream tasks. However, recent studies have shown that the inference performance of CLIP can be greatly degraded by small adversarial perturbations, especially its visual modality, posing significant safety threats. To mitigate this vulnerability, in this paper, we propose a novel defense method called Test-Time Adversarial Prompt Tuning (TAPT) to enhance the inference robustness of CLIP against visual adversarial attacks. TAPT is a test-time defense method that learns defensive bimodal (textual and visual) prompts to robustify the inference process of CLIP. Specifically, it is an unsupervised method that optimizes the defensive prompts for each test sample by minimizing a multi-view entropy and aligning adversarial-clean distributions. We evaluate the effectiveness of TAPT on 11 benchmark datasets, including ImageNet and 10 other zero-shot datasets, demonstrating that it enhances the zero-shot adversarial robustness of the original CLIP by at least 48.9% against AutoAttack (AA), while largely maintaining performance on clean examples. Moreover, TAPT outperforms existing adversarial prompt tuning methods across various backbones, achieving an average robustness improvement of at least 36.6%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13136v1-abstract-full').style.display = 'none'; document.getElementById('2411.13136v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12876">arXiv:2411.12876</a> <span> [<a href="https://arxiv.org/pdf/2411.12876">pdf</a>, <a href="https://arxiv.org/format/2411.12876">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Puppet-CNN: Input-Adaptive Convolutional Neural Networks with Model Compression using Ordinary Differential Equation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+Y">Yucheng Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12876v1-abstract-short" style="display: inline;"> Convolutional Neural Network (CNN) has been applied to more and more scenarios due to its excellent performance in many machine learning tasks, especially with deep and complex structures. However, as the network goes deeper, more parameters need to be stored and optimized. Besides, almost all common CNN models adopt "train-and-use" strategy where the structure is pre-defined and the kernel parame… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12876v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12876v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12876v1-abstract-full" style="display: none;"> Convolutional Neural Network (CNN) has been applied to more and more scenarios due to its excellent performance in many machine learning tasks, especially with deep and complex structures. However, as the network goes deeper, more parameters need to be stored and optimized. Besides, almost all common CNN models adopt "train-and-use" strategy where the structure is pre-defined and the kernel parameters are fixed after the training with the same structure and set of parameters used for all data without considering the content complexity. In this paper, we propose a new CNN framework, named as $\textit{Puppet-CNN}$, which contains two modules: a $\textit{puppet module}$ and a $\textit{puppeteer module}$. The puppet module is a CNN model used to actually process the input data just like other works, but its depth and kernels are generated by the puppeteer module (realized with Ordinary Differential Equation (ODE)) based on the input complexity each time. By recurrently generating kernel parameters in the puppet module, we can take advantage of the dependence among kernels of different convolutional layers to significantly reduce the size of CNN model by only storing and training the parameters of the much smaller puppeteer ODE module. Through experiments on several datasets, our method has proven to be superior than the traditional CNNs on both performance and efficiency. The model size can be reduced more than 10 times. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12876v1-abstract-full').style.display = 'none'; document.getElementById('2411.12876v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12161">arXiv:2411.12161</a> <span> [<a href="https://arxiv.org/pdf/2411.12161">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Cache Management for Complex Storage Systems Using CNN-LSTM-Based Spatiotemporal Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoye Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuan Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Linji Wang</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+T">Tingyi Ruan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pochun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12161v1-abstract-short" style="display: inline;"> This paper proposes an intelligent cache management strategy based on CNN-LSTM to improve the performance and cache hit rate of storage systems. Through comparative experiments with traditional algorithms (such as LRU and LFU) and other deep learning models (such as RNN, GRU-RNN and LSTM), the results show that the CNN-LSTM model has significant advantages in cache demand prediction. The MSE and M… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12161v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12161v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12161v1-abstract-full" style="display: none;"> This paper proposes an intelligent cache management strategy based on CNN-LSTM to improve the performance and cache hit rate of storage systems. Through comparative experiments with traditional algorithms (such as LRU and LFU) and other deep learning models (such as RNN, GRU-RNN and LSTM), the results show that the CNN-LSTM model has significant advantages in cache demand prediction. The MSE and MAE values of this model are significantly reduced, proving its effectiveness under complex data access patterns. This study not only verifies the potential of deep learning technology in storage system optimization, but also provides direction and reference for further optimizing and improving cache management strategies. This intelligent cache management strategy performs well in complex storage environments. By combining the spatial feature extraction capabilities of convolutional neural networks and the time series modeling capabilities of long short-term memory networks, the CNN-LSTM model can more accurately predict cache needs, thereby Dynamically optimize cache allocation to improve system response speed and resource utilization. This research provides theoretical support and practical reference for cache optimization under large-scale data access modes, and is of great significance to improving the performance of future storage systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12161v1-abstract-full').style.display = 'none'; document.getElementById('2411.12161v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11915">arXiv:2411.11915</a> <span> [<a href="https://arxiv.org/pdf/2411.11915">pdf</a>, <a href="https://arxiv.org/format/2411.11915">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Phenome-wide causal proteomics enhance systemic lupus erythematosus flare prediction: A study in Asian populations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liying Chen</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+O">Ou Deng</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+T">Ting Fang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mei Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xvfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Cong%2C+R">Ruichen Cong</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+D">Dingqi Lu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Runrun Zhang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Q">Qun Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinchang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11915v1-abstract-short" style="display: inline;"> Objective: Systemic lupus erythematosus (SLE) is a complex autoimmune disease characterized by unpredictable flares. This study aimed to develop a novel proteomics-based risk prediction model specifically for Asian SLE populations to enhance personalized disease management and early intervention. Methods: A longitudinal cohort study was conducted over 48 weeks, including 139 SLE patients monitored… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11915v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11915v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11915v1-abstract-full" style="display: none;"> Objective: Systemic lupus erythematosus (SLE) is a complex autoimmune disease characterized by unpredictable flares. This study aimed to develop a novel proteomics-based risk prediction model specifically for Asian SLE populations to enhance personalized disease management and early intervention. Methods: A longitudinal cohort study was conducted over 48 weeks, including 139 SLE patients monitored every 12 weeks. Patients were classified into flare (n = 53) and non-flare (n = 86) groups. Baseline plasma samples underwent data-independent acquisition (DIA) proteomics analysis, and phenome-wide Mendelian randomization (PheWAS) was performed to evaluate causal relationships between proteins and clinical predictors. Logistic regression (LR) and random forest (RF) models were used to integrate proteomic and clinical data for flare risk prediction. Results: Five proteins (SAA1, B4GALT5, GIT2, NAA15, and RPIA) were significantly associated with SLE Disease Activity Index-2K (SLEDAI-2K) scores and 1-year flare risk, implicating key pathways such as B-cell receptor signaling and platelet degranulation. SAA1 demonstrated causal effects on flare-related clinical markers, including hemoglobin and red blood cell counts. A combined model integrating clinical and proteomic data achieved the highest predictive accuracy (AUC = 0.769), surpassing individual models. SAA1 was highlighted as a priority biomarker for rapid flare discrimination. Conclusion: The integration of proteomic and clinical data significantly improves flare prediction in Asian SLE patients. The identification of key proteins and their causal relationships with flare-related clinical markers provides valuable insights for proactive SLE management and personalized therapeutic approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11915v1-abstract-full').style.display = 'none'; document.getElementById('2411.11915v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11683">arXiv:2411.11683</a> <span> [<a href="https://arxiv.org/pdf/2411.11683">pdf</a>, <a href="https://arxiv.org/format/2411.11683">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TrojanRobot: Backdoor Attacks Against Robotic Manipulation in the Physical World </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xianlong Wang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+H">Hewen Pan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hangtao Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Minghui Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shengshan Hu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Ziqi Zhou</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+L">Lulu Xue</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+P">Peijin Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yichen Wang</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+W">Wei Wan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Aishan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L+Y">Leo Yu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11683v1-abstract-short" style="display: inline;"> Robotic manipulation refers to the autonomous handling and interaction of robots with objects using advanced techniques in robotics and artificial intelligence. The advent of powerful tools such as large language models (LLMs) and large vision-language models (LVLMs) has significantly enhanced the capabilities of these robots in environmental perception and decision-making. However, the introducti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11683v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11683v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11683v1-abstract-full" style="display: none;"> Robotic manipulation refers to the autonomous handling and interaction of robots with objects using advanced techniques in robotics and artificial intelligence. The advent of powerful tools such as large language models (LLMs) and large vision-language models (LVLMs) has significantly enhanced the capabilities of these robots in environmental perception and decision-making. However, the introduction of these intelligent agents has led to security threats such as jailbreak attacks and adversarial attacks. In this research, we take a further step by proposing a backdoor attack specifically targeting robotic manipulation and, for the first time, implementing backdoor attack in the physical world. By embedding a backdoor visual language model into the visual perception module within the robotic system, we successfully mislead the robotic arm's operation in the physical world, given the presence of common items as triggers. Experimental evaluations in the physical world demonstrate the effectiveness of the proposed backdoor attack. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11683v1-abstract-full').style.display = 'none'; document.getElementById('2411.11683v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Initial version with preliminary results. We welcome any feedback or suggestions</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11487">arXiv:2411.11487</a> <span> [<a href="https://arxiv.org/pdf/2411.11487">pdf</a>, <a href="https://arxiv.org/format/2411.11487">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Look a Group at Once: Multi-Slide Modeling for Survival Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinyang Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yi Xie</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jianfei Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xi Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haixian Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11487v1-abstract-short" style="display: inline;"> Survival prediction is a critical task in pathology. In clinical practice, pathologists often examine multiple cases, leveraging a broader spectrum of cancer phenotypes to enhance pathological assessment. Despite significant advancements in deep learning, current solutions typically model each slide as a sample, struggling to effectively capture comparable and slide-agnostic pathological features.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11487v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11487v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11487v1-abstract-full" style="display: none;"> Survival prediction is a critical task in pathology. In clinical practice, pathologists often examine multiple cases, leveraging a broader spectrum of cancer phenotypes to enhance pathological assessment. Despite significant advancements in deep learning, current solutions typically model each slide as a sample, struggling to effectively capture comparable and slide-agnostic pathological features. In this paper, we introduce GroupMIL, a novel framework inspired by the clinical practice of collective analysis, which models multiple slides as a single sample and organizes groups of patches and slides sequentially to capture cross-slide prognostic features. We also present GPAMamba, a model designed to facilitate intra- and inter-slide feature interactions, effectively capturing local micro-environmental characteristics within slide-level graphs while uncovering essential prognostic patterns across an extended patch sequence within the group framework. Furthermore, we develop a dual-head predictor that delivers comprehensive survival risk and probability assessments for each patient. Extensive empirical evaluations demonstrate that our model significantly outperforms state-of-the-art approaches across five datasets from The Cancer Genome Atlas. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11487v1-abstract-full').style.display = 'none'; document.getElementById('2411.11487v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11033">arXiv:2411.11033</a> <span> [<a href="https://arxiv.org/pdf/2411.11033">pdf</a>, <a href="https://arxiv.org/format/2411.11033">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> REACCEPT: Automated Co-evolution of Production and Test Code Based on Dynamic Validation and Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chi%2C+J">Jianlei Chi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaotian Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuhan Huang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lechen Yu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+D">Di Cui</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jianguo Sun</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jun Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11033v1-abstract-short" style="display: inline;"> Synchronizing production and test code, known as PT co-evolution, is critical for software quality in the software development lifecycle. Existing methods for automatic PT co-evolution either utilize predefined heuristic rules or rely on simple application of machine learning techniques. Due to the limitations of underlying techniques, existing methods either only partially automate PT co-evolutio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11033v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11033v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11033v1-abstract-full" style="display: none;"> Synchronizing production and test code, known as PT co-evolution, is critical for software quality in the software development lifecycle. Existing methods for automatic PT co-evolution either utilize predefined heuristic rules or rely on simple application of machine learning techniques. Due to the limitations of underlying techniques, existing methods either only partially automate PT co-evolution (e.g., only automate obsolete test code identification) or result in low accuracy. In this paper, we propose REACCEPT, a novel approach that leverages large language models and dynamic validation to fully automate PT co-evolution (i.e., capable of both identifying and updating obsolete test cases). REACCEPT relies on experience-based prompt template generation, dynamic validation, and retrieval-augmented generation techniques to accomplish automated PT co-evolution. To evaluate REACCEPT's effectiveness, we extensive experiments with a dataset of 537 Java projects and compared REACCEPT's performance with several state-of-the-art methods. Results show that REACCEPT achieved an update accuracy of 60.16% on correctly identified obsolete test code, surpassing the state-of-the-art technique CEPROT by 90%. This confirms that REACCEPT can effectively assist developers in maintaining test code, improving overall software quality and reducing maintenance effort. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11033v1-abstract-full').style.display = 'none'; document.getElementById('2411.11033v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10961">arXiv:2411.10961</a> <span> [<a href="https://arxiv.org/pdf/2411.10961">pdf</a>, <a href="https://arxiv.org/format/2411.10961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Map-Free Trajectory Prediction with Map Distillation and Hierarchical Encoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaodong Liu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+Y">Yucheng Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10961v1-abstract-short" style="display: inline;"> Reliable motion forecasting of surrounding agents is essential for ensuring the safe operation of autonomous vehicles. Many existing trajectory prediction methods rely heavily on high-definition (HD) maps as strong driving priors. However, the availability and accuracy of these priors are not guaranteed due to substantial costs to build, localization errors of vehicles, or ongoing road constructio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10961v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10961v1-abstract-full" style="display: none;"> Reliable motion forecasting of surrounding agents is essential for ensuring the safe operation of autonomous vehicles. Many existing trajectory prediction methods rely heavily on high-definition (HD) maps as strong driving priors. However, the availability and accuracy of these priors are not guaranteed due to substantial costs to build, localization errors of vehicles, or ongoing road constructions. In this paper, we introduce MFTP, a Map-Free Trajectory Prediction method that offers several advantages. First, it eliminates the need for HD maps during inference while still benefiting from map priors during training via knowledge distillation. Second, we present a novel hierarchical encoder that effectively extracts spatial-temporal agent features and aggregates them into multiple trajectory queries. Additionally, we introduce an iterative decoder that sequentially decodes trajectory queries to generate the final predictions. Extensive experiments show that our approach achieves state-of-the-art performance on the Argoverse dataset under the map-free setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10961v1-abstract-full').style.display = 'none'; document.getElementById('2411.10961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10682">arXiv:2411.10682</a> <span> [<a href="https://arxiv.org/pdf/2411.10682">pdf</a>, <a href="https://arxiv.org/format/2411.10682">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Underwater Image Enhancement with Cascaded Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yi Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Q">Qiuping Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+T">Ting Luo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingchun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10682v1-abstract-short" style="display: inline;"> Underwater image enhancement (UIE) is a highly challenging task due to the complexity of underwater environment and the diversity of underwater image degradation. Due to the application of deep learning, current UIE methods have made significant progress. Most of the existing deep learning-based UIE methods follow a single-stage network which cannot effectively address the diverse degradations sim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10682v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10682v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10682v1-abstract-full" style="display: none;"> Underwater image enhancement (UIE) is a highly challenging task due to the complexity of underwater environment and the diversity of underwater image degradation. Due to the application of deep learning, current UIE methods have made significant progress. Most of the existing deep learning-based UIE methods follow a single-stage network which cannot effectively address the diverse degradations simultaneously. In this paper, we propose to address this issue by designing a two-stage deep learning framework and taking advantage of cascaded contrastive learning to guide the network training of each stage. The proposed method is called CCL-Net in short. Specifically, the proposed CCL-Net involves two cascaded stages, i.e., a color correction stage tailored to the color deviation issue and a haze removal stage tailored to improve the visibility and contrast of underwater images. To guarantee the underwater image can be progressively enhanced, we also apply contrastive loss as an additional constraint to guide the training of each stage. In the first stage, the raw underwater images are used as negative samples for building the first contrastive loss, ensuring the enhanced results of the first color correction stage are better than the original inputs. While in the second stage, the enhanced results rather than the raw underwater images of the first color correction stage are used as the negative samples for building the second contrastive loss, thus ensuring the final enhanced results of the second haze removal stage are better than the intermediate color corrected results. Extensive experiments on multiple benchmark datasets demonstrate that our CCL-Net can achieve superior performance compared to many state-of-the-art methods. The source code of CCL-Net will be released at https://github.com/lewis081/CCL-Net. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10682v1-abstract-full').style.display = 'none'; document.getElementById('2411.10682v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transacitons on MultiMedia</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10629">arXiv:2411.10629</a> <span> [<a href="https://arxiv.org/pdf/2411.10629">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Leveraging large language models for efficient representation learning for entity resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaowei Xu</a>, <a href="/search/cs?searchtype=author&query=Foua%2C+B+T">Bi T. Foua</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xingqiao Wang</a>, <a href="/search/cs?searchtype=author&query=Gunasekaran%2C+V">Vivek Gunasekaran</a>, <a href="/search/cs?searchtype=author&query=Talburt%2C+J+R">John R. Talburt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10629v1-abstract-short" style="display: inline;"> In this paper, the authors propose TriBERTa, a supervised entity resolution system that utilizes a pre-trained large language model and a triplet loss function to learn representations for entity matching. The system consists of two steps: first, name entity records are fed into a Sentence Bidirectional Encoder Representations from Transformers (SBERT) model to generate vector representations, whi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10629v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10629v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10629v1-abstract-full" style="display: none;"> In this paper, the authors propose TriBERTa, a supervised entity resolution system that utilizes a pre-trained large language model and a triplet loss function to learn representations for entity matching. The system consists of two steps: first, name entity records are fed into a Sentence Bidirectional Encoder Representations from Transformers (SBERT) model to generate vector representations, which are then fine-tuned using contrastive learning based on a triplet loss function. Fine-tuned representations are used as input for entity matching tasks, and the results show that the proposed approach outperforms state-of-the-art representations, including SBERT without fine-tuning and conventional Term Frequency-Inverse Document Frequency (TF-IDF), by a margin of 3 - 19%. Additionally, the representations generated by TriBERTa demonstrated increased robustness, maintaining consistently higher performance across a range of datasets. The authors also discussed the importance of entity resolution in today's data-driven landscape and the challenges that arise when identifying and reconciling duplicate data across different sources. They also described the ER process, which involves several crucial steps, including blocking, entity matching, and clustering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10629v1-abstract-full').style.display = 'none'; document.getElementById('2411.10629v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages and 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10541">arXiv:2411.10541</a> <span> [<a href="https://arxiv.org/pdf/2411.10541">pdf</a>, <a href="https://arxiv.org/format/2411.10541">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Does Prompt Formatting Have Any Impact on LLM Performance? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+J">Jia He</a>, <a href="/search/cs?searchtype=author&query=Rungta%2C+M">Mukund Rungta</a>, <a href="/search/cs?searchtype=author&query=Koleczek%2C+D">David Koleczek</a>, <a href="/search/cs?searchtype=author&query=Sekhon%2C+A">Arshdeep Sekhon</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F+X">Franklin X Wang</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+S">Sadid Hasan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10541v1-abstract-short" style="display: inline;"> In the realm of Large Language Models (LLMs), prompt optimization is crucial for model performance. Although previous research has explored aspects like rephrasing prompt contexts, using various prompting techniques (like in-context learning and chain-of-thought), and ordering few-shot examples, our understanding of LLM sensitivity to prompt templates remains limited. Therefore, this paper examine… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10541v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10541v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10541v1-abstract-full" style="display: none;"> In the realm of Large Language Models (LLMs), prompt optimization is crucial for model performance. Although previous research has explored aspects like rephrasing prompt contexts, using various prompting techniques (like in-context learning and chain-of-thought), and ordering few-shot examples, our understanding of LLM sensitivity to prompt templates remains limited. Therefore, this paper examines the impact of different prompt templates on LLM performance. We formatted the same contexts into various human-readable templates, including plain text, Markdown, JSON, and YAML, and evaluated their impact across tasks like natural language reasoning, code generation, and translation using OpenAI's GPT models. Experiments show that GPT-3.5-turbo's performance varies by up to 40\% in a code translation task depending on the prompt template, while larger models like GPT-4 are more robust to these variations. Our analysis highlights the need to reconsider the use of fixed prompt templates, as different formats can significantly affect model performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10541v1-abstract-full').style.display = 'none'; document.getElementById('2411.10541v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10291">arXiv:2411.10291</a> <span> [<a href="https://arxiv.org/pdf/2411.10291">pdf</a>, <a href="https://arxiv.org/format/2411.10291">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Moving Forward: A Review of Autonomous Driving Software and Hardware Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xu Wang</a>, <a href="/search/cs?searchtype=author&query=Maleki%2C+M+A">Mohammad Ali Maleki</a>, <a href="/search/cs?searchtype=author&query=Azhar%2C+M+W">Muhammad Waqar Azhar</a>, <a href="/search/cs?searchtype=author&query=Trancoso%2C+P">Pedro Trancoso</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10291v1-abstract-short" style="display: inline;"> With their potential to significantly reduce traffic accidents, enhance road safety, optimize traffic flow, and decrease congestion, autonomous driving systems are a major focus of research and development in recent years. Beyond these immediate benefits, they offer long-term advantages in promoting sustainable transportation by reducing emissions and fuel consumption. Achieving a high level of au… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10291v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10291v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10291v1-abstract-full" style="display: none;"> With their potential to significantly reduce traffic accidents, enhance road safety, optimize traffic flow, and decrease congestion, autonomous driving systems are a major focus of research and development in recent years. Beyond these immediate benefits, they offer long-term advantages in promoting sustainable transportation by reducing emissions and fuel consumption. Achieving a high level of autonomy across diverse conditions requires a comprehensive understanding of the environment. This is accomplished by processing data from sensors such as cameras, radars, and LiDARs through a software stack that relies heavily on machine learning algorithms. These ML models demand significant computational resources and involve large-scale data movement, presenting challenges for hardware to execute them efficiently and at high speed. In this survey, we first outline and highlight the key components of self-driving systems, covering input sensors, commonly used datasets, simulation platforms, and the software architecture. We then explore the underlying hardware platforms that support the execution of these software systems. By presenting a comprehensive view of autonomous driving systems and their increasing demands, particularly for higher levels of autonomy, we analyze the performance and efficiency of scaled-up off-the-shelf GPU/CPU-based systems, emphasizing the challenges within the computational components. Through examples showcasing the diverse computational and memory requirements in the software stack, we demonstrate how more specialized hardware and processing closer to memory can enable more efficient execution with lower latency. Finally, based on current trends and future demands, we conclude by speculating what a future hardware platform for autonomous driving might look like. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10291v1-abstract-full').style.display = 'none'; document.getElementById('2411.10291v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09820">arXiv:2411.09820</a> <span> [<a href="https://arxiv.org/pdf/2411.09820">pdf</a>, <a href="https://arxiv.org/format/2411.09820">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> WelQrate: Defining the Gold Standard in Small Molecule Drug Discovery Benchmarking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yunchao"> Yunchao</a>, <a href="/search/cs?searchtype=author&query=Liu"> Liu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Ha Dong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Moretti%2C+R">Rocco Moretti</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhaoqian Su</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">Jiawei Gu</a>, <a href="/search/cs?searchtype=author&query=Bodenheimer%2C+B">Bobby Bodenheimer</a>, <a href="/search/cs?searchtype=author&query=Weaver%2C+C+D">Charles David Weaver</a>, <a href="/search/cs?searchtype=author&query=Meiler%2C+J">Jens Meiler</a>, <a href="/search/cs?searchtype=author&query=Derr%2C+T">Tyler Derr</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09820v1-abstract-short" style="display: inline;"> While deep learning has revolutionized computer-aided drug discovery, the AI community has predominantly focused on model innovation and placed less emphasis on establishing best benchmarking practices. We posit that without a sound model evaluation framework, the AI community's efforts cannot reach their full potential, thereby slowing the progress and transfer of innovation into real-world drug… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09820v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09820v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09820v1-abstract-full" style="display: none;"> While deep learning has revolutionized computer-aided drug discovery, the AI community has predominantly focused on model innovation and placed less emphasis on establishing best benchmarking practices. We posit that without a sound model evaluation framework, the AI community's efforts cannot reach their full potential, thereby slowing the progress and transfer of innovation into real-world drug discovery. Thus, in this paper, we seek to establish a new gold standard for small molecule drug discovery benchmarking, WelQrate. Specifically, our contributions are threefold: WelQrate Dataset Collection - we introduce a meticulously curated collection of 9 datasets spanning 5 therapeutic target classes. Our hierarchical curation pipelines, designed by drug discovery experts, go beyond the primary high-throughput screen by leveraging additional confirmatory and counter screens along with rigorous domain-driven preprocessing, such as Pan-Assay Interference Compounds (PAINS) filtering, to ensure the high-quality data in the datasets; WelQrate Evaluation Framework - we propose a standardized model evaluation framework considering high-quality datasets, featurization, 3D conformation generation, evaluation metrics, and data splits, which provides a reliable benchmarking for drug discovery experts conducting real-world virtual screening; Benchmarking - we evaluate model performance through various research questions using the WelQrate dataset collection, exploring the effects of different models, dataset quality, featurization methods, and data splitting strategies on the results. In summary, we recommend adopting our proposed WelQrate as the gold standard in small molecule drug discovery benchmarking. The WelQrate dataset collection, along with the curation codes, and experimental scripts are all publicly available at WelQrate.org. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09820v1-abstract-full').style.display = 'none'; document.getElementById('2411.09820v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">* denotes equal contribution</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09693">arXiv:2411.09693</a> <span> [<a href="https://arxiv.org/pdf/2411.09693">pdf</a>, <a href="https://arxiv.org/format/2411.09693">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CropCraft: Inverse Procedural Modeling for 3D Reconstruction of Crop Plants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhai%2C+A+J">Albert J. Zhai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinlei Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kaiyuan Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhao Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Junxiong Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sheng Wang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zhenong Jin</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+K">Kaiyu Guan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shenlong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09693v1-abstract-short" style="display: inline;"> The ability to automatically build 3D digital twins of plants from images has countless applications in agriculture, environmental science, robotics, and other fields. However, current 3D reconstruction methods fail to recover complete shapes of plants due to heavy occlusion and complex geometries. In this work, we present a novel method for 3D reconstruction of agricultural crops based on optimiz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09693v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09693v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09693v1-abstract-full" style="display: none;"> The ability to automatically build 3D digital twins of plants from images has countless applications in agriculture, environmental science, robotics, and other fields. However, current 3D reconstruction methods fail to recover complete shapes of plants due to heavy occlusion and complex geometries. In this work, we present a novel method for 3D reconstruction of agricultural crops based on optimizing a parametric model of plant morphology via inverse procedural modeling. Our method first estimates depth maps by fitting a neural radiance field and then employs Bayesian optimization to estimate plant morphological parameters that result in consistent depth renderings. The resulting 3D model is complete and biologically plausible. We validate our method on a dataset of real images of agricultural fields, and demonstrate that the reconstructions can be used for a variety of monitoring and simulation applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09693v1-abstract-full').style.display = 'none'; document.getElementById('2411.09693v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09635">arXiv:2411.09635</a> <span> [<a href="https://arxiv.org/pdf/2411.09635">pdf</a>, <a href="https://arxiv.org/ps/2411.09635">ps</a>, <a href="https://arxiv.org/format/2411.09635">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Counterfactual Uncertainty Quantification of Factual Estimand of Efficacy from Before-and-After Treatment Repeated Measures Randomized Controlled Trials </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xingya Wang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yang Han</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yushi Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Szu-Yu Tang</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+J+C">Jason C. Hsu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09635v1-abstract-short" style="display: inline;"> The ideal estimand for comparing a new treatment $Rx$ with a control $C$ is the $\textit{counterfactual}$ efficacy $Rx:C$, the expected differential outcome between $Rx$ and $C$ if each patient were given $\textit{both}$. While counterfactual $\textit{point estimation}$ from $\textit{factual}$ Randomized Controlled Trials (RCTs) has been available, this article shows $\textit{counterfactual}$ unce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09635v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09635v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09635v1-abstract-full" style="display: none;"> The ideal estimand for comparing a new treatment $Rx$ with a control $C$ is the $\textit{counterfactual}$ efficacy $Rx:C$, the expected differential outcome between $Rx$ and $C$ if each patient were given $\textit{both}$. While counterfactual $\textit{point estimation}$ from $\textit{factual}$ Randomized Controlled Trials (RCTs) has been available, this article shows $\textit{counterfactual}$ uncertainty quantification (CUQ), quantifying uncertainty for factual point estimates but in a counterfactual setting, is surprisingly achievable. We achieve CUQ whose variability is typically smaller than factual UQ, by creating a new statistical modeling principle called ETZ which is applicable to RCTs with $\textit{Before-and-After}$ treatment Repeated Measures, common in many therapeutic areas. We urge caution when estimate of the unobservable true condition of a patient before treatment has measurement error, because that violation of standard regression assumption can cause attenuation in estimating treatment effects. Fortunately, we prove that, for traditional medicine in general, and for targeted therapy with efficacy defined as averaged over the population, counterfactual point estimation is unbiased. However, for targeted therapy, both Real Human and Digital Twins approaches should respect this limitation, lest predicted treatment effect in $\textit{subgroups}$ will have bias. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09635v1-abstract-full').style.display = 'none'; document.getElementById('2411.09635v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">39 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09451">arXiv:2411.09451</a> <span> [<a href="https://arxiv.org/pdf/2411.09451">pdf</a>, <a href="https://arxiv.org/format/2411.09451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DiffRoad: Realistic and Diverse Road Scenario Generation for Autonomous Vehicle Testing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Junjie Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lin Wang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Q">Qiang Meng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaofan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09451v1-abstract-short" style="display: inline;"> Generating realistic and diverse road scenarios is essential for autonomous vehicle testing and validation. Nevertheless, owing to the complexity and variability of real-world road environments, creating authentic and varied scenarios for intelligent driving testing is challenging. In this paper, we propose DiffRoad, a novel diffusion model designed to produce controllable and high-fidelity 3D roa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09451v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09451v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09451v1-abstract-full" style="display: none;"> Generating realistic and diverse road scenarios is essential for autonomous vehicle testing and validation. Nevertheless, owing to the complexity and variability of real-world road environments, creating authentic and varied scenarios for intelligent driving testing is challenging. In this paper, we propose DiffRoad, a novel diffusion model designed to produce controllable and high-fidelity 3D road scenarios. DiffRoad leverages the generative capabilities of diffusion models to synthesize road layouts from white noise through an inverse denoising process, preserving real-world spatial features. To enhance the quality of generated scenarios, we design the Road-UNet architecture, optimizing the balance between backbone and skip connections for high-realism scenario generation. Furthermore, we introduce a road scenario evaluation module that screens adequate and reasonable scenarios for intelligent driving testing using two critical metrics: road continuity and road reasonableness. Experimental results on multiple real-world datasets demonstrate DiffRoad's ability to generate realistic and smooth road structures while maintaining the original distribution. Additionally, the generated scenarios can be fully automated into the OpenDRIVE format, facilitating generalized autonomous vehicle simulation testing. DiffRoad provides a rich and diverse scenario library for large-scale autonomous vehicle testing and offers valuable insights for future infrastructure designs that are better suited for autonomous vehicles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09451v1-abstract-full').style.display = 'none'; document.getElementById('2411.09451v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09429">arXiv:2411.09429</a> <span> [<a href="https://arxiv.org/pdf/2411.09429">pdf</a>, <a href="https://arxiv.org/format/2411.09429">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Superconductivity">cond-mat.supr-con</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AI-driven inverse design of materials: Past, present and future </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+X">Xiao-Qi Han</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin-De Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Meng-Yuan Xu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Z">Zhen Feng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+B">Bo-Wen Yao</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+P">Peng-Jie Guo</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Ze-Feng Gao</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zhong-Yi Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09429v1-abstract-short" style="display: inline;"> The discovery of advanced materials is the cornerstone of human technological development and progress. The structures of materials and their corresponding properties are essentially the result of a complex interplay of multiple degrees of freedom such as lattice, charge, spin, symmetry, and topology. This poses significant challenges for the inverse design methods of materials. Humans have long e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09429v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09429v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09429v1-abstract-full" style="display: none;"> The discovery of advanced materials is the cornerstone of human technological development and progress. The structures of materials and their corresponding properties are essentially the result of a complex interplay of multiple degrees of freedom such as lattice, charge, spin, symmetry, and topology. This poses significant challenges for the inverse design methods of materials. Humans have long explored new materials through a large number of experiments and proposed corresponding theoretical systems to predict new material properties and structures. With the improvement of computational power, researchers have gradually developed various electronic structure calculation methods, particularly such as the one based density functional theory, as well as high-throughput computational methods. Recently, the rapid development of artificial intelligence technology in the field of computer science has enabled the effective characterization of the implicit association between material properties and structures, thus opening up an efficient paradigm for the inverse design of functional materials. A significant progress has been made in inverse design of materials based on generative and discriminative models, attracting widespread attention from researchers. Considering this rapid technological progress, in this survey, we look back on the latest advancements in AI-driven inverse design of materials by introducing the background, key findings, and mainstream technological development routes. In addition, we summarize the remaining issues for future directions. This survey provides the latest overview of AI-driven inverse design of materials, which can serve as a useful resource for researchers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09429v1-abstract-full').style.display = 'none'; document.getElementById('2411.09429v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">43 pages, 5 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09308">arXiv:2411.09308</a> <span> [<a href="https://arxiv.org/pdf/2411.09308">pdf</a>, <a href="https://arxiv.org/format/2411.09308">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DT-JRD: Deep Transformer based Just Recognizable Difference Prediction Model for Video Coding for Machines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junqi Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yun Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoqi Wang</a>, <a href="/search/cs?searchtype=author&query=Long%2C+X">Xu Long</a>, <a href="/search/cs?searchtype=author&query=Kwong%2C+S">Sam Kwong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09308v1-abstract-short" style="display: inline;"> Just Recognizable Difference (JRD) represents the minimum visual difference that is detectable by machine vision, which can be exploited to promote machine vision oriented visual signal processing. In this paper, we propose a Deep Transformer based JRD (DT-JRD) prediction model for Video Coding for Machines (VCM), where the accurately predicted JRD can be used reduce the coding bit rate while main… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09308v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09308v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09308v1-abstract-full" style="display: none;"> Just Recognizable Difference (JRD) represents the minimum visual difference that is detectable by machine vision, which can be exploited to promote machine vision oriented visual signal processing. In this paper, we propose a Deep Transformer based JRD (DT-JRD) prediction model for Video Coding for Machines (VCM), where the accurately predicted JRD can be used reduce the coding bit rate while maintaining the accuracy of machine tasks. Firstly, we model the JRD prediction as a multi-class classification and propose a DT-JRD prediction model that integrates an improved embedding, a content and distortion feature extraction, a multi-class classification and a novel learning strategy. Secondly, inspired by the perception property that machine vision exhibits a similar response to distortions near JRD, we propose an asymptotic JRD loss by using Gaussian Distribution-based Soft Labels (GDSL), which significantly extends the number of training labels and relaxes classification boundaries. Finally, we propose a DT-JRD based VCM to reduce the coding bits while maintaining the accuracy of object detection. Extensive experimental results demonstrate that the mean absolute error of the predicted JRD by the DT-JRD is 5.574, outperforming the state-of-the-art JRD prediction model by 13.1%. Coding experiments shows that comparing with the VVC, the DT-JRD based VCM achieves an average of 29.58% bit rate reduction while maintaining the object detection accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09308v1-abstract-full').style.display = 'none'; document.getElementById('2411.09308v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE Transactions on Multimedia</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09159">arXiv:2411.09159</a> <span> [<a href="https://arxiv.org/pdf/2411.09159">pdf</a>, <a href="https://arxiv.org/format/2411.09159">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> PIMCOMP: An End-to-End DNN Compiler for Processing-In-Memory Accelerators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xiaotian Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyu Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wanqian Li</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yinhe Han</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaoming Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09159v1-abstract-short" style="display: inline;"> Various processing-in-memory (PIM) accelerators based on various devices, micro-architectures, and interfaces have been proposed to accelerate deep neural networks (DNNs). How to deploy DNNs onto PIM-based accelerators is the key to explore PIM's high performance and energy efficiency. The scale of DNN models, the diversity of PIM accelerators, and the complexity of deployment are far beyond the h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09159v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09159v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09159v1-abstract-full" style="display: none;"> Various processing-in-memory (PIM) accelerators based on various devices, micro-architectures, and interfaces have been proposed to accelerate deep neural networks (DNNs). How to deploy DNNs onto PIM-based accelerators is the key to explore PIM's high performance and energy efficiency. The scale of DNN models, the diversity of PIM accelerators, and the complexity of deployment are far beyond the human deployment capability. Hence, an automatic deployment methodology is indispensable. In this work, we propose PIMCOMP, an end-to-end DNN compiler tailored for PIM accelerators, achieving efficient deployment of DNN models on PIM hardware. PIMCOMP can adapt to various PIM architectures by using an abstract configurable PIM accelerator template with a set of pseudo-instructions, which is a high-level abstraction of the hardware's fundamental functionalities. Through a generic multi-level optimization framework, PIMCOMP realizes an end-to-end conversion from a high-level DNN description to pseudo-instructions, which can be further converted to specific hardware intrinsics/primitives. The compilation addresses two critical issues in PIM-accelerated inference from a system perspective: resource utilization and dataflow scheduling. PIMCOMP adopts a flexible unfolding format to reshape and partition convolutional layers, adopts a weight-layout guided computation-storage-mapping approach to enhance resource utilization, and balances the system's computation, memory access, and communication characteristics. For dataflow scheduling, we design two scheduling algorithms with different inter-layer pipeline granularities to support varying application scenarios while ensuring high computational parallelism. Experiments demonstrate that PIMCOMP improves throughput, latency, and energy efficiency across various architectures. PIMCOMP is open-sourced at \url{https://github.com/sunxt99/PIMCOMP-NN}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09159v1-abstract-full').style.display = 'none'; document.getElementById('2411.09159v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08972">arXiv:2411.08972</a> <span> [<a href="https://arxiv.org/pdf/2411.08972">pdf</a>, <a href="https://arxiv.org/format/2411.08972">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Designing Automated Market Makers for Combinatorial Securities: A Geometric Viewpoint </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hossain%2C+P+S">Prommy Sultana Hossain</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xintong Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+F">Fang-Yi Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08972v1-abstract-short" style="display: inline;"> Designing automated market makers (AMMs) for prediction markets on combinatorial securities over large outcome spaces poses significant computational challenges. Prior research has primarily focused on combinatorial prediction markets within specific set systems (e.g., intervals, permutations). We introduce a framework for designing AMMs on arbitrary set systems by building a novel connection to t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08972v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08972v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08972v1-abstract-full" style="display: none;"> Designing automated market makers (AMMs) for prediction markets on combinatorial securities over large outcome spaces poses significant computational challenges. Prior research has primarily focused on combinatorial prediction markets within specific set systems (e.g., intervals, permutations). We introduce a framework for designing AMMs on arbitrary set systems by building a novel connection to the range query problem in computational geometry. This connection enables the analysis of computational complexity and the design of efficient AMMs. We first demonstrate the equivalence between price queries and trade updates under the popular combinatorial logarithmic market scoring rule market and the range query and range update problem. Building on this equivalence, we construct sublinear time algorithms when the VC dimension of the set system is bounded and show the non-existence of such algorithms for unbounded VC dimension cases. We then extend this approach to AMMs for combinatorial prediction markets with quadratic and power scoring rules. Finally, we show that the multi-resolution market design can be naturally integrated into the partition-tree scheme. Additionally, we introduce the combinatorial swap operation problem for automated market makers in decentralized finance and show that it can be efficiently reduced to range update problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08972v1-abstract-full').style.display = 'none'; document.getElementById('2411.08972v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">38 pages, 1 figure, accepted at SODA'25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08794">arXiv:2411.08794</a> <span> [<a href="https://arxiv.org/pdf/2411.08794">pdf</a>, <a href="https://arxiv.org/format/2411.08794">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating World Models with LLM for Decision Making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chang Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinrun Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Junzhe Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qinggang Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08794v1-abstract-short" style="display: inline;"> World model emerges as a key module in decision making, where MuZero and Dreamer achieve remarkable successes in complex tasks. Recent work leverages Large Language Models (LLMs) as general world simulators to simulate the dynamics of the world due to their generalizability. LLMs also serve as the world model for deliberative reasoning in Reasoning via Planning (RAP) and Tree of Thought (ToT). How… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08794v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08794v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08794v1-abstract-full" style="display: none;"> World model emerges as a key module in decision making, where MuZero and Dreamer achieve remarkable successes in complex tasks. Recent work leverages Large Language Models (LLMs) as general world simulators to simulate the dynamics of the world due to their generalizability. LLMs also serve as the world model for deliberative reasoning in Reasoning via Planning (RAP) and Tree of Thought (ToT). However, the world models are either evaluated as a general world simulator, or as a functional module of the agent, i.e., predicting the transitions to assist the planning. In this work, we propose a comprehensive evaluation of the world models with LLMs from the decision making perspective. Specifically, we leverage the 31 diverse environments from (Wang et al., 2023;2024) and curate the rule-based policy of each environment for the diverse evaluation. Then, we design three main tasks, i.e., policy verification, action proposal, and policy planning, where the world models can be used for decision making solely. Finally, we conduct the comprehensive evaluation of the advanced LLMs, i.e., GPT-4o and GPT-4o-mini, on the environments for the three main tasks under various settings. The key observations include: i) GPT-4o significantly outperforms GPT-4o-mini on the three main tasks, especially for the tasks which require the domain knowledge, ii) the performance of the world model with LLM will be decreased for long-term decision-making tasks, and iii) the combination of different functionalities of the world model will brings additional unstabilities of the performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08794v1-abstract-full').style.display = 'none'; document.getElementById('2411.08794v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08703">arXiv:2411.08703</a> <span> [<a href="https://arxiv.org/pdf/2411.08703">pdf</a>, <a href="https://arxiv.org/format/2411.08703">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MVKTrans: Multi-View Knowledge Transfer for Robust Multiomics Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cong%2C+S">Shan Cong</a>, <a href="/search/cs?searchtype=author&query=Sang%2C+Z">Zhiling Sang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongwei Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+H">Hong Liang</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jie Hao</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaohui Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08703v1-abstract-short" style="display: inline;"> The distinct characteristics of multiomics data, including complex interactions within and across biological layers and disease heterogeneity (e.g., heterogeneity in etiology and clinical symptoms), drive us to develop novel designs to address unique challenges in multiomics prediction. In this paper, we propose the multi-view knowledge transfer learning (MVKTrans) framework, which transfers intra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08703v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08703v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08703v1-abstract-full" style="display: none;"> The distinct characteristics of multiomics data, including complex interactions within and across biological layers and disease heterogeneity (e.g., heterogeneity in etiology and clinical symptoms), drive us to develop novel designs to address unique challenges in multiomics prediction. In this paper, we propose the multi-view knowledge transfer learning (MVKTrans) framework, which transfers intra- and inter-omics knowledge in an adaptive manner by reviewing data heterogeneity and suppressing bias transfer, thereby enhancing classification performance. Specifically, we design a graph contrastive module that is trained on unlabeled data to effectively learn and transfer the underlying intra-omics patterns to the supervised task. This unsupervised pretraining promotes learning general and unbiased representations for each modality, regardless of the downstream tasks. In light of the varying discriminative capacities of modalities across different diseases and/or samples, we introduce an adaptive and bi-directional cross-omics distillation module. This module automatically identifies richer modalities and facilitates dynamic knowledge transfer from more informative to less informative omics, thereby enabling a more robust and generalized integration. Extensive experiments on four real biomedical datasets demonstrate the superior performance and robustness of MVKTrans compared to the state-of-the-art. Code and data are available at https://github.com/Yaolab-fantastic/MVKTrans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08703v1-abstract-full').style.display = 'none'; document.getElementById('2411.08703v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08641">arXiv:2411.08641</a> <span> [<a href="https://arxiv.org/pdf/2411.08641">pdf</a>, <a href="https://arxiv.org/format/2411.08641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DipMe: Haptic Recognition of Granular Media for Tangible Interactive Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinkai Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuo Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Ziyi Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lifeng Zhu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+A">Aiguo Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08641v1-abstract-short" style="display: inline;"> While tangible user interface has shown its power in naturally interacting with rigid or soft objects, users cannot conveniently use different types of granular materials as the interaction media. We introduce DipMe as a smart device to recognize the types of granular media in real time, which can be used to connect the granular materials in the physical world with various virtual content. Other t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08641v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08641v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08641v1-abstract-full" style="display: none;"> While tangible user interface has shown its power in naturally interacting with rigid or soft objects, users cannot conveniently use different types of granular materials as the interaction media. We introduce DipMe as a smart device to recognize the types of granular media in real time, which can be used to connect the granular materials in the physical world with various virtual content. Other than vision-based solutions, we propose a dip operation of our device and exploit the haptic signals to recognize different types of granular materials. With modern machine learning tools, we find the haptic signals from different granular media are distinguishable by DipMe. With the online granular object recognition, we build several tangible interactive applications, demonstrating the effects of DipMe in perceiving granular materials and its potential in developing a tangible user interface with granular objects as the new media. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08641v1-abstract-full').style.display = 'none'; document.getElementById('2411.08641v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08433">arXiv:2411.08433</a> <span> [<a href="https://arxiv.org/pdf/2411.08433">pdf</a>, <a href="https://arxiv.org/format/2411.08433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> 3D Multi-Object Tracking with Semi-Supervised GRU-Kalman Filter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoxiang Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaxin Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+M">Miaojie Feng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaoxing Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08433v1-abstract-short" style="display: inline;"> 3D Multi-Object Tracking (MOT), a fundamental component of environmental perception, is essential for intelligent systems like autonomous driving and robotic sensing. Although Tracking-by-Detection frameworks have demonstrated excellent performance in recent years, their application in real-world scenarios faces significant challenges. Object movement in complex environments is often highly nonlin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08433v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08433v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08433v1-abstract-full" style="display: none;"> 3D Multi-Object Tracking (MOT), a fundamental component of environmental perception, is essential for intelligent systems like autonomous driving and robotic sensing. Although Tracking-by-Detection frameworks have demonstrated excellent performance in recent years, their application in real-world scenarios faces significant challenges. Object movement in complex environments is often highly nonlinear, while existing methods typically rely on linear approximations of motion. Furthermore, system noise is frequently modeled as a Gaussian distribution, which fails to capture the true complexity of the noise dynamics. These oversimplified modeling assumptions can lead to significant reductions in tracking precision. To address this, we propose a GRU-based MOT method, which introduces a learnable Kalman filter into the motion module. This approach is able to learn object motion characteristics through data-driven learning, thereby avoiding the need for manual model design and model error. At the same time, to avoid abnormal supervision caused by the wrong association between annotations and trajectories, we design a semi-supervised learning strategy to accelerate the convergence speed and improve the robustness of the model. Evaluation experiment on the nuScenes and Argoverse2 datasets demonstrates that our system exhibits superior performance and significant potential compared to traditional TBD methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08433v1-abstract-full').style.display = 'none'; document.getElementById('2411.08433v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08380">arXiv:2411.08380</a> <span> [<a href="https://arxiv.org/pdf/2411.08380">pdf</a>, <a href="https://arxiv.org/format/2411.08380">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> EgoVid-5M: A Large-Scale Video-Action Dataset for Egocentric Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaofeng Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kang Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiayu Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+G">Guosheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+X">Xiaoyi Bao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingya Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xingang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08380v1-abstract-short" style="display: inline;"> Video generation has emerged as a promising tool for world simulation, leveraging visual data to replicate real-world environments. Within this context, egocentric video generation, which centers on the human perspective, holds significant potential for enhancing applications in virtual reality, augmented reality, and gaming. However, the generation of egocentric videos presents substantial challe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08380v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08380v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08380v1-abstract-full" style="display: none;"> Video generation has emerged as a promising tool for world simulation, leveraging visual data to replicate real-world environments. Within this context, egocentric video generation, which centers on the human perspective, holds significant potential for enhancing applications in virtual reality, augmented reality, and gaming. However, the generation of egocentric videos presents substantial challenges due to the dynamic nature of egocentric viewpoints, the intricate diversity of actions, and the complex variety of scenes encountered. Existing datasets are inadequate for addressing these challenges effectively. To bridge this gap, we present EgoVid-5M, the first high-quality dataset specifically curated for egocentric video generation. EgoVid-5M encompasses 5 million egocentric video clips and is enriched with detailed action annotations, including fine-grained kinematic control and high-level textual descriptions. To ensure the integrity and usability of the dataset, we implement a sophisticated data cleaning pipeline designed to maintain frame consistency, action coherence, and motion smoothness under egocentric conditions. Furthermore, we introduce EgoDreamer, which is capable of generating egocentric videos driven simultaneously by action descriptions and kinematic control signals. The EgoVid-5M dataset, associated action annotations, and all data cleansing metadata will be released for the advancement of research in egocentric video generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08380v1-abstract-full').style.display = 'none'; document.getElementById('2411.08380v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://egovid.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08320">arXiv:2411.08320</a> <span> [<a href="https://arxiv.org/pdf/2411.08320">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Responsible AI in Construction Safety: Systematic Evaluation of Large Language Models and Prompt Engineering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sammour%2C+F">Farouq Sammour</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jia Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xi Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+M">Mo Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhenyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08320v1-abstract-short" style="display: inline;"> Construction remains one of the most hazardous sectors. Recent advancements in AI, particularly Large Language Models (LLMs), offer promising opportunities for enhancing workplace safety. However, responsible integration of LLMs requires systematic evaluation, as deploying them without understanding their capabilities and limitations risks generating inaccurate information, fostering misplaced con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08320v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08320v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08320v1-abstract-full" style="display: none;"> Construction remains one of the most hazardous sectors. Recent advancements in AI, particularly Large Language Models (LLMs), offer promising opportunities for enhancing workplace safety. However, responsible integration of LLMs requires systematic evaluation, as deploying them without understanding their capabilities and limitations risks generating inaccurate information, fostering misplaced confidence, and compromising worker safety. This study evaluates the performance of two widely used LLMs, GPT-3.5 and GPT-4o, across three standardized exams administered by the Board of Certified Safety Professionals (BCSP). Using 385 questions spanning seven safety knowledge areas, the study analyzes the models' accuracy, consistency, and reliability. Results show that both models consistently exceed the BCSP benchmark, with GPT-4o achieving an accuracy rate of 84.6% and GPT-3.5 reaching 73.8%. Both models demonstrate strengths in safety management systems and hazard identification and control, but exhibit weaknesses in science, mathematics, emergency response, and fire prevention. An error analysis identifies four primary limitations affecting LLM performance: lack of knowledge, reasoning flaws, memory issues, and calculation errors. Our study also highlights the impact of prompt engineering strategies, with variations in accuracy reaching 13.5% for GPT-3.5 and 7.9% for GPT-4o. However, no single prompt configuration proves universally effective. This research advances knowledge in three ways: by identifying areas where LLMs can support safety practices and where human oversight remains essential, by offering practical insights into improving LLM implementation through prompt engineering, and by providing evidence-based direction for future research and development. These contributions support the responsible integration of AI in construction safety management toward achieving zero injuries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08320v1-abstract-full').style.display = 'none'; document.getElementById('2411.08320v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08312">arXiv:2411.08312</a> <span> [<a href="https://arxiv.org/pdf/2411.08312">pdf</a>, <a href="https://arxiv.org/format/2411.08312">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> A Novel Extensible Simulation Framework for CXL-Enabled Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=An%2C+Y">Yuda An</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+S">Shushu Yi</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+B">Bo Mao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qiao Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mingzhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Ke Zhou</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+N">Nong Xiao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guangyu Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaolin Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yingwei Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08312v1-abstract-short" style="display: inline;"> Compute Express Link (CXL) serves as a rising industry standard, delivering high-speed cache-coherent links to a variety of devices, including host CPUs, computational accelerators, and memory devices. It is designed to promote system scalability, enable peer-to-peer exchanges, and accelerate data transmissions. To achieve these objectives, the most recent CXL protocol has brought forth several in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08312v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08312v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08312v1-abstract-full" style="display: none;"> Compute Express Link (CXL) serves as a rising industry standard, delivering high-speed cache-coherent links to a variety of devices, including host CPUs, computational accelerators, and memory devices. It is designed to promote system scalability, enable peer-to-peer exchanges, and accelerate data transmissions. To achieve these objectives, the most recent CXL protocol has brought forth several innovative features, such as port-focused routing, device-handled coherence, and PCIe 6.0 compatibility. However, due to the limited availability of hardware prototypes and simulators compatible with CXL, earlier CXL research has largely depended on emulating CXL devices using remote NUMA nodes. Unfortunately, these NUMA-based emulators have difficulties in accurately representing the new features due to fundamental differences in hardware and protocols. Moreover, the absence of support for non-tree topology and PCIe links makes it complex to merely adapt existing simulators for CXL simulation. To overcome these problems, we introduce ESF, a simulation framework specifically designed for CXL systems. ESF has been developed to accurately reflect the unique features of the latest CXL protocol from the ground up. It uses a specialized interconnect layer to facilitate connections within a wide range of system topologies and also includes key components to carry out specific functions required by these features. By utilizing ESF, we thoroughly investigate various aspects of CXL systems, including system topology, device-handled coherence, and the effects of PCIe characteristics, leading to important findings that can guide the creation of high-performance CXL systems. The ESF source codes are fully open-source and can be accessed at https://anonymous.4open.science/r/ESF-1CE3. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08312v1-abstract-full').style.display = 'none'; document.getElementById('2411.08312v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08167">arXiv:2411.08167</a> <span> [<a href="https://arxiv.org/pdf/2411.08167">pdf</a>, <a href="https://arxiv.org/ps/2411.08167">ps</a>, <a href="https://arxiv.org/format/2411.08167">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Stochastic Bandits Robust to Adversarial Corruptions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghaffari%2C+F">Fatemeh Ghaffari</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xuchuang Wang</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+J">Jinhang Zuo</a>, <a href="/search/cs?searchtype=author&query=Hajiesmaili%2C+M">Mohammad Hajiesmaili</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08167v1-abstract-short" style="display: inline;"> We study the problem of multi-agent multi-armed bandits with adversarial corruption in a heterogeneous setting, where each agent accesses a subset of arms. The adversary can corrupt the reward observations for all agents. Agents share these corrupted rewards with each other, and the objective is to maximize the cumulative total reward of all agents (and not be misled by the adversary). We propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08167v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08167v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08167v1-abstract-full" style="display: none;"> We study the problem of multi-agent multi-armed bandits with adversarial corruption in a heterogeneous setting, where each agent accesses a subset of arms. The adversary can corrupt the reward observations for all agents. Agents share these corrupted rewards with each other, and the objective is to maximize the cumulative total reward of all agents (and not be misled by the adversary). We propose a multi-agent cooperative learning algorithm that is robust to adversarial corruptions. For this newly devised algorithm, we demonstrate that an adversary with an unknown corruption budget $C$ only incurs an additive $O((L / L_{\min}) C)$ term to the standard regret of the model in non-corruption settings, where $L$ is the total number of agents, and $L_{\min}$ is the minimum number of agents with mutual access to an arm. As a side-product, our algorithm also improves the state-of-the-art regret bounds when reducing to both the single-agent and homogeneous multi-agent scenarios, tightening multiplicative $K$ (the number of arms) and $L$ (the number of agents) factors, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08167v1-abstract-full').style.display = 'none'; document.getElementById('2411.08167v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07711">arXiv:2411.07711</a> <span> [<a href="https://arxiv.org/pdf/2411.07711">pdf</a>, <a href="https://arxiv.org/format/2411.07711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> OWLed: Outlier-weighed Layerwise Pruning for Efficient Autonomous Driving Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiaxi Li</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+L">Lu Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xilu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07711v1-abstract-short" style="display: inline;"> The integration of Large Language Models (LLMs) into autonomous driving systems offers promising enhancements in environmental understanding and decision-making. However, the substantial computational demands of deploying LLMs locally on vehicles render this approach unfeasible for real-world automotive applications. To address this challenge, we introduce OWLed, the Outlier-Weighed Layerwise Prun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07711v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07711v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07711v1-abstract-full" style="display: none;"> The integration of Large Language Models (LLMs) into autonomous driving systems offers promising enhancements in environmental understanding and decision-making. However, the substantial computational demands of deploying LLMs locally on vehicles render this approach unfeasible for real-world automotive applications. To address this challenge, we introduce OWLed, the Outlier-Weighed Layerwise Pruning for Efficient Autonomous Driving Framework that leverages outlier-weighted layerwise sparsity for model compression. Our method assigns non-uniform sparsity ratios to different layers based on the distribution of outlier features, significantly reducing the model size without the need for fine-tuning. To ensure the compressed model adapts well to autonomous driving tasks, we incorporate driving environment data into both the calibration and pruning processes. Our empirical studies reveal that the encoder component is more sensitive to pruning than the LLM, highlighting its critical role in the system. Experimental results demonstrate that OWLed outperforms existing methods in perception, action prediction, and language understanding while substantially lowering computational requirements. These findings underscore the potential of combining advanced pruning techniques with LLMs to develop efficient and robust autonomous driving systems capable of handling complex scenarios. Code will be made publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07711v1-abstract-full').style.display = 'none'; document.getElementById('2411.07711v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07506">arXiv:2411.07506</a> <span> [<a href="https://arxiv.org/pdf/2411.07506">pdf</a>, <a href="https://arxiv.org/format/2411.07506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FM-TS: Flow Matching for Time Series Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yang Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiao Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lirong Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huatian Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S+Z">Stan Z. Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sheng Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianlong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07506v1-abstract-short" style="display: inline;"> Time series generation has emerged as an essential tool for analyzing temporal data across numerous fields. While diffusion models have recently gained significant attention in generating high-quality time series, they tend to be computationally demanding and reliant on complex stochastic processes. To address these limitations, we introduce FM-TS, a rectified Flow Matching-based framework for Tim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07506v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07506v1-abstract-full" style="display: none;"> Time series generation has emerged as an essential tool for analyzing temporal data across numerous fields. While diffusion models have recently gained significant attention in generating high-quality time series, they tend to be computationally demanding and reliant on complex stochastic processes. To address these limitations, we introduce FM-TS, a rectified Flow Matching-based framework for Time Series generation, which simplifies the time series generation process by directly optimizing continuous trajectories. This approach avoids the need for iterative sampling or complex noise schedules typically required in diffusion-based models. FM-TS is more efficient in terms of training and inference. Moreover, FM-TS is highly adaptive, supporting both conditional and unconditional time series generation. Notably, through our novel inference design, the model trained in an unconditional setting can seamlessly generalize to conditional tasks without the need for retraining. Extensive benchmarking across both settings demonstrates that FM-TS consistently delivers superior performance compared to existing approaches while being more efficient in terms of training and inference. For instance, in terms of discriminative score, FM-TS achieves 0.005, 0.019, 0.011, 0.005, 0.053, and 0.106 on the Sines, Stocks, ETTh, MuJoCo, Energy, and fMRI unconditional time series datasets, respectively, significantly outperforming the second-best method which achieves 0.006, 0.067, 0.061, 0.008, 0.122, and 0.167 on the same datasets. We have achieved superior performance in solar forecasting and MuJoCo imputation tasks, significantly enhanced by our innovative $t$ power sampling method. The code is available at https://github.com/UNITES-Lab/FMTS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07506v1-abstract-full').style.display = 'none'; document.getElementById('2411.07506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07387">arXiv:2411.07387</a> <span> [<a href="https://arxiv.org/pdf/2411.07387">pdf</a>, <a href="https://arxiv.org/format/2411.07387">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Isochrony-Controlled Speech-to-Text Translation: A study on translating from Sino-Tibetan to Indo-European Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yousefi%2C+M">Midia Yousefi</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+Y">Yao Qian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Junkun Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Gang Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yanqing Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dongmei Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaofei Wang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+J">Jian Xue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07387v1-abstract-short" style="display: inline;"> End-to-end speech translation (ST), which translates source language speech directly into target language text, has garnered significant attention in recent years. Many ST applications require strict length control to ensure that the translation duration matches the length of the source audio, including both speech and pause segments. Previous methods often controlled the number of words or charac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07387v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07387v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07387v1-abstract-full" style="display: none;"> End-to-end speech translation (ST), which translates source language speech directly into target language text, has garnered significant attention in recent years. Many ST applications require strict length control to ensure that the translation duration matches the length of the source audio, including both speech and pause segments. Previous methods often controlled the number of words or characters generated by the Machine Translation model to approximate the source sentence's length without considering the isochrony of pauses and speech segments, as duration can vary between languages. To address this, we present improvements to the duration alignment component of our sequence-to-sequence ST model. Our method controls translation length by predicting the duration of speech and pauses in conjunction with the translation process. This is achieved by providing timing information to the decoder, ensuring it tracks the remaining duration for speech and pauses while generating the translation. The evaluation on the Zh-En test set of CoVoST 2, demonstrates that the proposed Isochrony-Controlled ST achieves 0.92 speech overlap and 8.9 BLEU, which has only a 1.4 BLEU drop compared to the ST baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07387v1-abstract-full').style.display = 'none'; document.getElementById('2411.07387v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07271">arXiv:2411.07271</a> <span> [<a href="https://arxiv.org/pdf/2411.07271">pdf</a>, <a href="https://arxiv.org/format/2411.07271">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> Multi-hop Upstream Preemptive Traffic Signal Control with Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaocan Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoyu Wang</a>, <a href="/search/cs?searchtype=author&query=Smirnov%2C+I">Ilia Smirnov</a>, <a href="/search/cs?searchtype=author&query=Sanner%2C+S">Scott Sanner</a>, <a href="/search/cs?searchtype=author&query=Abdulhai%2C+B">Baher Abdulhai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07271v1-abstract-short" style="display: inline;"> Traffic signal control is crucial for managing congestion in urban networks. Existing myopic pressure-based control methods focus only on immediate upstream links, leading to suboptimal green time allocation and increased network delays. Effective signal control, however, inherently requires a broader spatial scope, as traffic conditions further upstream can significantly impact traffic at the cur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07271v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07271v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07271v1-abstract-full" style="display: none;"> Traffic signal control is crucial for managing congestion in urban networks. Existing myopic pressure-based control methods focus only on immediate upstream links, leading to suboptimal green time allocation and increased network delays. Effective signal control, however, inherently requires a broader spatial scope, as traffic conditions further upstream can significantly impact traffic at the current location. This paper introduces a novel concept based on the Markov chain theory, namely multi-hop upstream pressure, that generalizes the conventional pressure to account for traffic conditions beyond the immediate upstream links. This farsighted and compact metric informs the deep reinforcement learning agent to preemptively clear the present queues, guiding the agent to optimize signal timings with a broader spatial awareness. Simulations on synthetic and realistic (Toronto) scenarios demonstrate controllers utilizing multi-hop upstream pressure significantly reduce overall network delay by prioritizing traffic movements based on a broader understanding of upstream congestion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07271v1-abstract-full').style.display = 'none'; document.getElementById('2411.07271v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 tables, 12 figures. arXiv admin note: text overlap with arXiv:2409.00753</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06920">arXiv:2411.06920</a> <span> [<a href="https://arxiv.org/pdf/2411.06920">pdf</a>, <a href="https://arxiv.org/format/2411.06920">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Safe Planner: Empowering Safety Awareness in Large Pre-Trained Models for Robot Task Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+S">Siyuan Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Z">Zhe Ma</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Feifan Liu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jiani Lu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Q">Qinqin Xiao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+K">Kewu Sun</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+L">Lingfei Cui</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xirui Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Peng Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xun Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06920v1-abstract-short" style="display: inline;"> Robot task planning is an important problem for autonomous robots in long-horizon challenging tasks. As large pre-trained models have demonstrated superior planning ability, recent research investigates utilizing large models to achieve autonomous planning for robots in diverse tasks. However, since the large models are pre-trained with Internet data and lack the knowledge of real task scenes, lar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06920v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06920v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06920v1-abstract-full" style="display: none;"> Robot task planning is an important problem for autonomous robots in long-horizon challenging tasks. As large pre-trained models have demonstrated superior planning ability, recent research investigates utilizing large models to achieve autonomous planning for robots in diverse tasks. However, since the large models are pre-trained with Internet data and lack the knowledge of real task scenes, large models as planners may make unsafe decisions that hurt the robots and the surrounding environments. To solve this challenge, we propose a novel Safe Planner framework, which empowers safety awareness in large pre-trained models to accomplish safe and executable planning. In this framework, we develop a safety prediction module to guide the high-level large model planner, and this safety module trained in a simulator can be effectively transferred to real-world tasks. The proposed Safe Planner framework is evaluated on both simulated environments and real robots. The experiment results demonstrate that Safe Planner not only achieves state-of-the-art task success rates, but also substantially improves safety during task execution. The experiment videos are shown in https://sites.google.com/view/safeplanner . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06920v1-abstract-full').style.display = 'none'; document.getElementById('2411.06920v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06700">arXiv:2411.06700</a> <span> [<a href="https://arxiv.org/pdf/2411.06700">pdf</a>, <a href="https://arxiv.org/format/2411.06700">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HomoMatcher: Dense Feature Matching Results with Semi-Dense Efficiency by Homography Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaolong Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lei Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingying Zhang</a>, <a href="/search/cs?searchtype=author&query=Lao%2C+J">Jiangwei Lao</a>, <a href="/search/cs?searchtype=author&query=Ru%2C+L">Lixiang Ru</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+L">Liheng Zhong</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jingdong Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Ming Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06700v1-abstract-short" style="display: inline;"> Feature matching between image pairs is a fundamental problem in computer vision that drives many applications, such as SLAM. Recently, semi-dense matching approaches have achieved substantial performance enhancements and established a widely-accepted coarse-to-fine paradigm. However, the majority of existing methods focus on improving coarse feature representation rather than the fine-matching mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06700v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06700v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06700v1-abstract-full" style="display: none;"> Feature matching between image pairs is a fundamental problem in computer vision that drives many applications, such as SLAM. Recently, semi-dense matching approaches have achieved substantial performance enhancements and established a widely-accepted coarse-to-fine paradigm. However, the majority of existing methods focus on improving coarse feature representation rather than the fine-matching module. Prior fine-matching techniques, which rely on point-to-patch matching probability expectation or direct regression, often lack precision and do not guarantee the continuity of feature points across sequential images. To address this limitation, this paper concentrates on enhancing the fine-matching module in the semi-dense matching framework. We employ a lightweight and efficient homography estimation network to generate the perspective mapping between patches obtained from coarse matching. This patch-to-patch approach achieves the overall alignment of two patches, resulting in a higher sub-pixel accuracy by incorporating additional constraints. By leveraging the homography estimation between patches, we can achieve a dense matching result with low computational cost. Extensive experiments demonstrate that our method achieves higher accuracy compared to previous semi-dense matchers. Meanwhile, our dense matching results exhibit similar end-point-error accuracy compared to previous dense matchers while maintaining semi-dense efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06700v1-abstract-full').style.display = 'none'; document.getElementById('2411.06700v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 5 figures, conference under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06405">arXiv:2411.06405</a> <span> [<a href="https://arxiv.org/pdf/2411.06405">pdf</a>, <a href="https://arxiv.org/format/2411.06405">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Parallel Higher-order Truss Decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+J">Jingya Qian</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hui Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yongye Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoyang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06405v1-abstract-short" style="display: inline;"> The k-truss model is one of the most important models in cohesive subgraph analysis. The k-truss decomposition problem is to compute the trussness of each edge in a given graph, and has been extensively studied. However, the conventional k-truss model is difficult to characterize the fine-grained hierarchical structures in networks due to the neglect of high order information. To overcome the limi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06405v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06405v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06405v1-abstract-full" style="display: none;"> The k-truss model is one of the most important models in cohesive subgraph analysis. The k-truss decomposition problem is to compute the trussness of each edge in a given graph, and has been extensively studied. However, the conventional k-truss model is difficult to characterize the fine-grained hierarchical structures in networks due to the neglect of high order information. To overcome the limitation, the higher-order truss model is proposed in the literature. However, the previous solutions only consider non-parallel scenarios. To fill the gap, in this paper, we conduct the first research to study the problem of parallel higher-order truss decomposition. Specifically, a parallel framework is first proposed. Moreover, several optimizations are further developed to accelerate the processing. Finally, experiments over 6 real-world networks are conducted to verify the performance of proposed methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06405v1-abstract-full').style.display = 'none'; document.getElementById('2411.06405v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06229">arXiv:2411.06229</a> <span> [<a href="https://arxiv.org/pdf/2411.06229">pdf</a>, <a href="https://arxiv.org/format/2411.06229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Contrastive Learning of Urban Space Representations from POI Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinglei Wang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+T">Tao Cheng</a>, <a href="/search/cs?searchtype=author&query=Law%2C+S">Stephen Law</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zichao Zeng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+L">Lu Yin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyuan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06229v1-abstract-short" style="display: inline;"> Existing methods for learning urban space representations from Point-of-Interest (POI) data face several limitations, including issues with geographical delineation, inadequate spatial information modelling, underutilisation of POI semantic attributes, and computational inefficiencies. To address these issues, we propose CaLLiPer (Contrastive Language-Location Pre-training), a novel representation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06229v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06229v1-abstract-full" style="display: none;"> Existing methods for learning urban space representations from Point-of-Interest (POI) data face several limitations, including issues with geographical delineation, inadequate spatial information modelling, underutilisation of POI semantic attributes, and computational inefficiencies. To address these issues, we propose CaLLiPer (Contrastive Language-Location Pre-training), a novel representation learning model that directly embeds continuous urban spaces into vector representations that can capture the spatial and semantic distribution of urban environment. This model leverages a multimodal contrastive learning objective, aligning location embeddings with textual POI descriptions, thereby bypassing the need for complex training corpus construction and negative sampling. We validate CaLLiPer's effectiveness by applying it to learning urban space representations in London, UK, where it demonstrates 5-15% improvement in predictive performance for land use classification and socioeconomic mapping tasks compared to state-of-the-art methods. Visualisations of the learned representations further illustrate our model's advantages in capturing spatial variations in urban semantics with high accuracy and fine resolution. Additionally, CaLLiPer achieves reduced training time, showcasing its efficiency and scalability. This work provides a promising pathway for scalable, semantically rich urban space representation learning that can support the development of geospatial foundation models. The implementation code is available at https://github.com/xlwang233/CaLLiPer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06229v1-abstract-full').style.display = 'none'; document.getElementById('2411.06229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 5 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06207">arXiv:2411.06207</a> <span> [<a href="https://arxiv.org/pdf/2411.06207">pdf</a>, <a href="https://arxiv.org/format/2411.06207">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploring Knowledge Boundaries in Large Language Models for Retrieval Judgment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyu Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yong Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhuo Chen</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+F">Feiteng Mu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+M">Mengting Hu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+P">Pengjun Xie</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fei Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06207v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) are increasingly recognized for their practical applications. However, these models often encounter challenges in dynamically changing knowledge, as well as in managing unknown static knowledge. Retrieval-Augmented Generation (RAG) tackles this challenge and has shown a significant impact on LLMs. Actually, we find that the impact of RAG on the question answering capab… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06207v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06207v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06207v1-abstract-full" style="display: none;"> Large Language Models (LLMs) are increasingly recognized for their practical applications. However, these models often encounter challenges in dynamically changing knowledge, as well as in managing unknown static knowledge. Retrieval-Augmented Generation (RAG) tackles this challenge and has shown a significant impact on LLMs. Actually, we find that the impact of RAG on the question answering capabilities of LLMs can be categorized into three groups: beneficial, neutral, and harmful. By minimizing retrieval requests that yield neutral or harmful results, we can effectively reduce both time and computational costs, while also improving the overall performance of LLMs. This insight motivates us to differentiate between types of questions using certain metrics as indicators, to decrease the retrieval ratio without compromising performance. In our work, we propose a method that is able to identify different types of questions from this view by training a Knowledge Boundary Model (KBM). Experiments conducted on 11 English and Chinese datasets illustrate that the KBM effectively delineates the knowledge boundary, significantly decreasing the proportion of retrievals required for optimal end-to-end performance. Specifically, we evaluate the effectiveness of KBM in three complex scenarios: dynamic knowledge, long-tail static knowledge, and multi-hop problems, as well as its functionality as an external LLM plug-in. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06207v1-abstract-full').style.display = 'none'; document.getElementById('2411.06207v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05990">arXiv:2411.05990</a> <span> [<a href="https://arxiv.org/pdf/2411.05990">pdf</a>, <a href="https://arxiv.org/format/2411.05990">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Game-theoretic LLM: Agent Workflow for Negotiation Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+W">Wenyue Hua</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+O">Ollie Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lingyao Li</a>, <a href="/search/cs?searchtype=author&query=Amayuelas%2C+A">Alfonso Amayuelas</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Julie Chen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Lucas Jiang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+M">Mingyu Jin</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Lizhou Fan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+F">Fei Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">William Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xintong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongfeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05990v2-abstract-short" style="display: inline;"> This paper investigates the rationality of large language models (LLMs) in strategic decision-making contexts, specifically within the framework of game theory. We evaluate several state-of-the-art LLMs across a spectrum of complete-information and incomplete-information games. Our findings reveal that LLMs frequently deviate from rational strategies, particularly as the complexity of the game inc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05990v2-abstract-full').style.display = 'inline'; document.getElementById('2411.05990v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05990v2-abstract-full" style="display: none;"> This paper investigates the rationality of large language models (LLMs) in strategic decision-making contexts, specifically within the framework of game theory. We evaluate several state-of-the-art LLMs across a spectrum of complete-information and incomplete-information games. Our findings reveal that LLMs frequently deviate from rational strategies, particularly as the complexity of the game increases with larger payoff matrices or deeper sequential trees. To address these limitations, we design multiple game-theoretic workflows that guide the reasoning and decision-making processes of LLMs. These workflows aim to enhance the models' ability to compute Nash Equilibria and make rational choices, even under conditions of uncertainty and incomplete information. Experimental results demonstrate that the adoption of these workflows significantly improves the rationality and robustness of LLMs in game-theoretic tasks. Specifically, with the workflow, LLMs exhibit marked improvements in identifying optimal strategies, achieving near-optimal allocations in negotiation scenarios, and reducing susceptibility to exploitation during negotiations. Furthermore, we explore the meta-strategic considerations of whether it is rational for agents to adopt such workflows, recognizing that the decision to use or forgo the workflow constitutes a game-theoretic issue in itself. Our research contributes to a deeper understanding of LLMs' decision-making capabilities in strategic contexts and provides insights into enhancing their rationality through structured workflows. The findings have implications for the development of more robust and strategically sound AI agents capable of navigating complex interactive environments. Code and data supporting this study are available at \url{https://github.com/Wenyueh/game_theory}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05990v2-abstract-full').style.display = 'none'; document.getElementById('2411.05990v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">45 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05346">arXiv:2411.05346</a> <span> [<a href="https://arxiv.org/pdf/2411.05346">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Reinforcement Learning for Adaptive Resource Scheduling in Complex System Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+P">Pochun Li</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Y">Yuyang Xiao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Jinghua Yan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuan Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoye Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05346v1-abstract-short" style="display: inline;"> This study presents a novel computer system performance optimization and adaptive workload management scheduling algorithm based on Q-learning. In modern computing environments, characterized by increasing data volumes, task complexity, and dynamic workloads, traditional static scheduling methods such as Round-Robin and Priority Scheduling fail to meet the demands of efficient resource allocation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05346v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05346v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05346v1-abstract-full" style="display: none;"> This study presents a novel computer system performance optimization and adaptive workload management scheduling algorithm based on Q-learning. In modern computing environments, characterized by increasing data volumes, task complexity, and dynamic workloads, traditional static scheduling methods such as Round-Robin and Priority Scheduling fail to meet the demands of efficient resource allocation and real-time adaptability. By contrast, Q-learning, a reinforcement learning algorithm, continuously learns from system state changes, enabling dynamic scheduling and resource optimization. Through extensive experiments, the superiority of the proposed approach is demonstrated in both task completion time and resource utilization, outperforming traditional and dynamic resource allocation (DRA) algorithms. These findings are critical as they highlight the potential of intelligent scheduling algorithms based on reinforcement learning to address the growing complexity and unpredictability of computing environments. This research provides a foundation for the integration of AI-driven adaptive scheduling in future large-scale systems, offering a scalable, intelligent solution to enhance system performance, reduce operating costs, and support sustainable energy consumption. The broad applicability of this approach makes it a promising candidate for next-generation computing frameworks, such as edge computing, cloud computing, and the Internet of Things. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05346v1-abstract-full').style.display = 'none'; document.getElementById('2411.05346v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04740">arXiv:2411.04740</a> <span> [<a href="https://arxiv.org/pdf/2411.04740">pdf</a>, <a href="https://arxiv.org/format/2411.04740">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Quantum Neural Network Classifier for Cancer Registry System Testing: A Feasibility Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/cs?searchtype=author&query=Ali%2C+S">Shaukat Ali</a>, <a href="/search/cs?searchtype=author&query=Arcaini%2C+P">Paolo Arcaini</a>, <a href="/search/cs?searchtype=author&query=Veeraragavan%2C+N+R">Narasimha Raghavan Veeraragavan</a>, <a href="/search/cs?searchtype=author&query=Nyg%C3%A5rd%2C+J+F">Jan F. Nyg氓rd</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04740v1-abstract-short" style="display: inline;"> The Cancer Registry of Norway (CRN) is a part of the Norwegian Institute of Public Health (NIPH) and is tasked with producing statistics on cancer among the Norwegian population. For this task, CRN develops, tests, and evolves a software system called Cancer Registration Support System (CaReSS). It is a complex socio-technical software system that interacts with many entities (e.g., hospitals, med… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04740v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04740v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04740v1-abstract-full" style="display: none;"> The Cancer Registry of Norway (CRN) is a part of the Norwegian Institute of Public Health (NIPH) and is tasked with producing statistics on cancer among the Norwegian population. For this task, CRN develops, tests, and evolves a software system called Cancer Registration Support System (CaReSS). It is a complex socio-technical software system that interacts with many entities (e.g., hospitals, medical laboratories, and other patient registries) to achieve its task. For cost-effective testing of CaReSS, CRN has employed EvoMaster, an AI-based REST API testing tool combined with an integrated classical machine learning model. Within this context, we propose Qlinical to investigate the feasibility of using, inside EvoMaster, a Quantum Neural Network (QNN) classifier, i.e., a quantum machine learning model, instead of the existing classical machine learning model. Results indicate that Qlinical can achieve performance comparable to that of EvoClass. We further explore the effects of various QNN configurations on performance and offer recommendations for optimal QNN settings for future QNN developers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04740v1-abstract-full').style.display = 'none'; document.getElementById('2411.04740v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04704">arXiv:2411.04704</a> <span> [<a href="https://arxiv.org/pdf/2411.04704">pdf</a>, <a href="https://arxiv.org/format/2411.04704">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Distinguishing LLM-generated from Human-written Code by Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaodan Xu</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+C">Chao Ni</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xinrong Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shaoxuan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoya Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kui Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaohu Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04704v1-abstract-short" style="display: inline;"> Large language models (LLMs), such as ChatGPT released by OpenAI, have attracted significant attention from both industry and academia due to their demonstrated ability to generate high-quality content for various tasks. Despite the impressive capabilities of LLMs, there are growing concerns regarding their potential risks in various fields, such as news, education, and software engineering. Recen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04704v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04704v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04704v1-abstract-full" style="display: none;"> Large language models (LLMs), such as ChatGPT released by OpenAI, have attracted significant attention from both industry and academia due to their demonstrated ability to generate high-quality content for various tasks. Despite the impressive capabilities of LLMs, there are growing concerns regarding their potential risks in various fields, such as news, education, and software engineering. Recently, several commercial and open-source LLM-generated content detectors have been proposed, which, however, are primarily designed for detecting natural language content without considering the specific characteristics of program code. This paper aims to fill this gap by proposing a novel ChatGPT-generated code detector, CodeGPTSensor, based on a contrastive learning framework and a semantic encoder built with UniXcoder. To assess the effectiveness of CodeGPTSensor on differentiating ChatGPT-generated code from human-written code, we first curate a large-scale Human and Machine comparison Corpus (HMCorp), which includes 550K pairs of human-written and ChatGPT-generated code (i.e., 288K Python code pairs and 222K Java code pairs). Based on the HMCorp dataset, our qualitative and quantitative analysis of the characteristics of ChatGPT-generated code reveals the challenge and opportunity of distinguishing ChatGPT-generated code from human-written code with their representative features. Our experimental results indicate that CodeGPTSensor can effectively identify ChatGPT-generated code, outperforming all selected baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04704v1-abstract-full').style.display = 'none'; document.getElementById('2411.04704v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 6 figures, Accepted by TOSEM'24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04137">arXiv:2411.04137</a> <span> [<a href="https://arxiv.org/pdf/2411.04137">pdf</a>, <a href="https://arxiv.org/format/2411.04137">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generative AI Enabled Matching for 6G Multiple Access </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xudong Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+L">Lijie Zhou</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+L">Lei Feng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhixiang Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fanqin Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenjing Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04137v1-abstract-short" style="display: inline;"> In wireless networks, applying deep learning models to solve matching problems between different entities has become a mainstream and effective approach. However, the complex network topology in 6G multiple access presents significant challenges for the real-time performance and stability of matching generation. Generative artificial intelligence (GenAI) has demonstrated strong capabilities in gra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04137v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04137v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04137v1-abstract-full" style="display: none;"> In wireless networks, applying deep learning models to solve matching problems between different entities has become a mainstream and effective approach. However, the complex network topology in 6G multiple access presents significant challenges for the real-time performance and stability of matching generation. Generative artificial intelligence (GenAI) has demonstrated strong capabilities in graph feature extraction, exploration, and generation, offering potential for graph-structured matching generation. In this paper, we propose a GenAI-enabled matching generation framework to support 6G multiple access. Specifically, we first summarize the classical matching theory, discuss common GenAI models and applications from the perspective of matching generation. Then, we propose a framework based on generative diffusion models (GDMs) that iteratively denoises toward reward maximization to generate a matching strategy that meets specific requirements. Experimental results show that, compared to decision-based AI approaches, our framework can generate more effective matching strategies based on given conditions and predefined rewards, helping to solve complex problems in 6G multiple access, such as task allocation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04137v1-abstract-full').style.display = 'none'; document.getElementById('2411.04137v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages,5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04105">arXiv:2411.04105</a> <span> [<a href="https://arxiv.org/pdf/2411.04105">pdf</a>, <a href="https://arxiv.org/format/2411.04105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> How Transformers Solve Propositional Logic Problems: A Mechanistic Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hong%2C+G+Z">Guan Zhe Hong</a>, <a href="/search/cs?searchtype=author&query=Dikkala%2C+N">Nishanth Dikkala</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+E">Enming Luo</a>, <a href="/search/cs?searchtype=author&query=Rashtchian%2C+C">Cyrus Rashtchian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Panigrahy%2C+R">Rina Panigrahy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04105v2-abstract-short" style="display: inline;"> Large language models (LLMs) have shown amazing performance on tasks that require planning and reasoning. Motivated by this, we investigate the internal mechanisms that underpin a network's ability to perform complex logical reasoning. We first construct a synthetic propositional logic problem that serves as a concrete test-bed for network training and evaluation. Crucially, this problem demands n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04105v2-abstract-full').style.display = 'inline'; document.getElementById('2411.04105v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04105v2-abstract-full" style="display: none;"> Large language models (LLMs) have shown amazing performance on tasks that require planning and reasoning. Motivated by this, we investigate the internal mechanisms that underpin a network's ability to perform complex logical reasoning. We first construct a synthetic propositional logic problem that serves as a concrete test-bed for network training and evaluation. Crucially, this problem demands nontrivial planning to solve, but we can train a small transformer to achieve perfect accuracy. Building on our set-up, we then pursue an understanding of precisely how a three-layer transformer, trained from scratch, solves this problem. We are able to identify certain "planning" and "reasoning" circuits in the network that necessitate cooperation between the attention blocks to implement the desired logic. To expand our findings, we then study a larger model, Mistral 7B. Using activation patching, we characterize internal components that are critical in solving our logic problem. Overall, our work systemically uncovers novel aspects of small and large transformers, and continues the study of how they plan and reason. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04105v2-abstract-full').style.display = 'none'; document.getElementById('2411.04105v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03865">arXiv:2411.03865</a> <span> [<a href="https://arxiv.org/pdf/2411.03865">pdf</a>, <a href="https://arxiv.org/format/2411.03865">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> AdaSociety: An Adaptive Environment with Social Structures for Multi-Agent Decision-Making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yizhe Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xingbo Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hao Liu</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+F">Fanqi Kong</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+A">Aoyang Qin</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+M">Min Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoxi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Song-Chun Zhu</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+M">Mingjie Bi</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+S">Siyuan Qi</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+X">Xue Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03865v1-abstract-short" style="display: inline;"> Traditional interactive environments limit agents' intelligence growth with fixed tasks. Recently, single-agent environments address this by generating new tasks based on agent actions, enhancing task diversity. We consider the decision-making problem in multi-agent settings, where tasks are further influenced by social connections, affecting rewards and information access. However, existing multi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03865v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03865v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03865v1-abstract-full" style="display: none;"> Traditional interactive environments limit agents' intelligence growth with fixed tasks. Recently, single-agent environments address this by generating new tasks based on agent actions, enhancing task diversity. We consider the decision-making problem in multi-agent settings, where tasks are further influenced by social connections, affecting rewards and information access. However, existing multi-agent environments lack a combination of adaptive physical surroundings and social connections, hindering the learning of intelligent behaviors. To address this, we introduce AdaSociety, a customizable multi-agent environment featuring expanding state and action spaces, alongside explicit and alterable social structures. As agents progress, the environment adaptively generates new tasks with social structures for agents to undertake. In AdaSociety, we develop three mini-games showcasing distinct social structures and tasks. Initial results demonstrate that specific social structures can promote both individual and collective benefits, though current reinforcement learning and LLM-based algorithms show limited effectiveness in leveraging social structures to enhance performance. Overall, AdaSociety serves as a valuable research platform for exploring intelligence in diverse physical and social settings. The code is available at https://github.com/bigai-ai/AdaSociety. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03865v1-abstract-full').style.display = 'none'; document.getElementById('2411.03865v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS D&B 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03857">arXiv:2411.03857</a> <span> [<a href="https://arxiv.org/pdf/2411.03857">pdf</a>, <a href="https://arxiv.org/format/2411.03857">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Efficient Message Passing Architecture for GCN Training on HBM-based FPGAs with Orthogonal Topology On-Chip Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qizhe Wu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Letian Zhao</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+Y">Yuchen Gui</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H+L+X">Huawen Liang Xiaotian Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03857v1-abstract-short" style="display: inline;"> Graph Convolutional Networks (GCNs) are state-of-the-art deep learning models for representation learning on graphs. However, the efficient training of GCNs is hampered by constraints in memory capacity and bandwidth, compounded by the irregular data flow that results in communication bottlenecks. To address these challenges, we propose a message-passing architecture that leverages NUMA-based memo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03857v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03857v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03857v1-abstract-full" style="display: none;"> Graph Convolutional Networks (GCNs) are state-of-the-art deep learning models for representation learning on graphs. However, the efficient training of GCNs is hampered by constraints in memory capacity and bandwidth, compounded by the irregular data flow that results in communication bottlenecks. To address these challenges, we propose a message-passing architecture that leverages NUMA-based memory access properties and employs a parallel multicast routing algorithm based on a 4-D hypercube network within the accelerator for efficient message passing in graphs. Additionally, we have re-engineered the backpropagation algorithm specific to GCNs within our proposed accelerator. This redesign strategically mitigates the memory demands prevalent during the training phase and diminishes the computational overhead associated with the transposition of extensive matrices. Compared to the state-of-the-art HP-GNN architecture we achieved a performance improvement of $1.03\times \sim 1.81\times$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03857v1-abstract-full').style.display = 'none'; document.getElementById('2411.03857v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted for 2024 ACM/SIGDA International Symposium on Field Programmable Gate Arrays(FPGA'24) as poster</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wang%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Wang%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository