CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 183 results for author: <span class="mathjax">Fu, Q</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Fu%2C+Q">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Fu, Q"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Fu%2C+Q&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Fu, Q"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15910">arXiv:2410.15910</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.15910">pdf</a>, <a href="https://arxiv.org/format/2410.15910">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Diverse Policies Recovering via Pointwise Mutual Information Weighted Imitation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+H">Hanlin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+J">Jian Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Weiming Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Q">Qing Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+H">Hanmin Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Kong%2C+H">Hansheng Kong</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+K">Kirk Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+J">Jiechao Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+C">Chao Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+K">Kai Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+J">Junliang Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hongwu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhuo%2C+J">Juchao Zhuo</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+Y">Yang Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+H">Haobo Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15910v2-abstract-short" style="display: inline;"> Recovering a spectrum of diverse policies from a set of expert trajectories is an important research topic in imitation learning. After determining a latent style for a trajectory, previous diverse policies recovering methods usually employ a vanilla behavioral cloning learning objective conditioned on the latent style, treating each state-action pair in the trajectory with equal importance. Based&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15910v2-abstract-full').style.display = 'inline'; document.getElementById('2410.15910v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15910v2-abstract-full" style="display: none;"> Recovering a spectrum of diverse policies from a set of expert trajectories is an important research topic in imitation learning. After determining a latent style for a trajectory, previous diverse policies recovering methods usually employ a vanilla behavioral cloning learning objective conditioned on the latent style, treating each state-action pair in the trajectory with equal importance. Based on an observation that in many scenarios, behavioral styles are often highly relevant with only a subset of state-action pairs, this paper presents a new principled method in diverse polices recovery. In particular, after inferring or assigning a latent style for a trajectory, we enhance the vanilla behavioral cloning by incorporating a weighting mechanism based on pointwise mutual information. This additional weighting reflects the significance of each state-action pair&#39;s contribution to learning the style, thus allowing our method to focus on state-action pairs most representative of that style. We provide theoretical justifications for our new objective, and extensive empirical evaluations confirm the effectiveness of our method in recovering diverse policies from expert data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15910v2-abstract-full').style.display = 'none'; document.getElementById('2410.15910v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14072">arXiv:2410.14072</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.14072">pdf</a>, <a href="https://arxiv.org/format/2410.14072">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Efficient Vision-Language Models by Summarizing Visual Tokens into Compact Registers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wen%2C+Y">Yuxin Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Q">Qingqing Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qichen Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Mehta%2C+S">Sachin Mehta</a>, <a href="/search/cs?searchtype=author&amp;query=Najibi%2C+M">Mahyar Najibi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14072v1-abstract-short" style="display: inline;"> Recent advancements in vision-language models (VLMs) have expanded their potential for real-world applications, enabling these models to perform complex reasoning on images. In the widely used fully autoregressive transformer-based models like LLaVA, projected visual tokens are prepended to textual tokens. Oftentimes, visual tokens are significantly more than prompt tokens, resulting in increased&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14072v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14072v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14072v1-abstract-full" style="display: none;"> Recent advancements in vision-language models (VLMs) have expanded their potential for real-world applications, enabling these models to perform complex reasoning on images. In the widely used fully autoregressive transformer-based models like LLaVA, projected visual tokens are prepended to textual tokens. Oftentimes, visual tokens are significantly more than prompt tokens, resulting in increased computational overhead during both training and inference. In this paper, we propose Visual Compact Token Registers (Victor), a method that reduces the number of visual tokens by summarizing them into a smaller set of register tokens. Victor adds a few learnable register tokens after the visual tokens and summarizes the visual information into these registers using the first few layers in the language tower of VLMs. After these few layers, all visual tokens are discarded, significantly improving computational efficiency for both training and inference. Notably, our method is easy to implement and requires a small number of new trainable parameters with minimal impact on model performance. In our experiment, with merely 8 visual registers--about 1% of the original tokens--Victor shows less than a 4% accuracy drop while reducing the total training time by 43% and boosting the inference throughput by 3.3X. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14072v1-abstract-full').style.display = 'none'; document.getElementById('2410.14072v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19833">arXiv:2409.19833</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.19833">pdf</a>, <a href="https://arxiv.org/format/2409.19833">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HazyDet: Open-source Benchmark for Drone-view Object Detection with Depth-cues in Hazy Scenes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Feng%2C+C">Changfeng Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhenyuan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Kou%2C+R">Renke Kou</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+G">Guangwei Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Chunping Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Shu%2C+X">Xiangbo Shu</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+Y">Yimian Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19833v1-abstract-short" style="display: inline;"> Drone-based object detection in adverse weather conditions is crucial for enhancing drones&#39; environmental perception, yet it remains largely unexplored due to the lack of relevant benchmarks. To bridge this gap, we introduce HazyDet, a large-scale dataset tailored for drone-based object detection in hazy scenes. It encompasses 383,000 real-world instances, collected from both naturally hazy enviro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19833v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19833v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19833v1-abstract-full" style="display: none;"> Drone-based object detection in adverse weather conditions is crucial for enhancing drones&#39; environmental perception, yet it remains largely unexplored due to the lack of relevant benchmarks. To bridge this gap, we introduce HazyDet, a large-scale dataset tailored for drone-based object detection in hazy scenes. It encompasses 383,000 real-world instances, collected from both naturally hazy environments and normal scenes with synthetically imposed haze effects to simulate adverse weather conditions. By observing the significant variations in object scale and clarity under different depth and haze conditions, we designed a Depth Conditioned Detector (DeCoDet) to incorporate this prior knowledge. DeCoDet features a Multi-scale Depth-aware Detection Head that seamlessly integrates depth perception, with the resulting depth cues harnessed by a dynamic Depth Condition Kernel module. Furthermore, we propose a Scale Invariant Refurbishment Loss to facilitate the learning of robust depth cues from pseudo-labels. Extensive evaluations on the HazyDet dataset demonstrate the flexibility and effectiveness of our method, yielding significant performance improvements. Our dataset and toolkit are available at https://github.com/GrokCV/HazyDet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19833v1-abstract-full').style.display = 'none'; document.getElementById('2409.19833v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09149">arXiv:2409.09149</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.09149">pdf</a>, <a href="https://arxiv.org/format/2409.09149">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Multi-Modal Control of Digital Human Hand Synthesis Using a Region-Aware Cycle Loss </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qifan Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xiaohang Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Asad%2C+M">Muhammad Asad</a>, <a href="/search/cs?searchtype=author&amp;query=Oh%2C+C">Changjae Oh</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+S">Shanxin Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Slabaugh%2C+G">Gregory Slabaugh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09149v1-abstract-short" style="display: inline;"> Diffusion models have shown their remarkable ability to synthesize images, including the generation of humans in specific poses. However, current models face challenges in adequately expressing conditional control for detailed hand pose generation, leading to significant distortion in the hand regions. To tackle this problem, we first curate the How2Sign dataset to provide richer and more accurate&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09149v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09149v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09149v1-abstract-full" style="display: none;"> Diffusion models have shown their remarkable ability to synthesize images, including the generation of humans in specific poses. However, current models face challenges in adequately expressing conditional control for detailed hand pose generation, leading to significant distortion in the hand regions. To tackle this problem, we first curate the How2Sign dataset to provide richer and more accurate hand pose annotations. In addition, we introduce adaptive, multi-modal fusion to integrate characters&#39; physical features expressed in different modalities such as skeleton, depth, and surface normal. Furthermore, we propose a novel Region-Aware Cycle Loss (RACL) that enables the diffusion model training to focus on improving the hand region, resulting in improved quality of generated hand gestures. More specifically, the proposed RACL computes a weighted keypoint distance between the full-body pose keypoints from the generated image and the ground truth, to generate higher-quality hand poses while balancing overall pose accuracy. Moreover, we use two hand region metrics, named hand-PSNR and hand-Distance for hand pose generation evaluations. Our experimental evaluations demonstrate the effectiveness of our proposed approach in improving the quality of digital human pose generation using diffusion models, especially the quality of the hand region. The source code is available at https://github.com/fuqifan/Region-Aware-Cycle-Loss. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09149v1-abstract-full').style.display = 'none'; document.getElementById('2409.09149v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by the ECCV 2024 HANDS workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03707">arXiv:2409.03707</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.03707">pdf</a>, <a href="https://arxiv.org/format/2409.03707">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Different Level Text Protection Mechanism With Differential Privacy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qingwen Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03707v1-abstract-short" style="display: inline;"> The article introduces a method for extracting words of different degrees of importance based on the BERT pre-training model and proves the effectiveness of this method. The article also discusses the impact of maintaining the same perturbation results for words of different importance on the overall text utility. This method can be applied to long text protection. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03707v1-abstract-full" style="display: none;"> The article introduces a method for extracting words of different degrees of importance based on the BERT pre-training model and proves the effectiveness of this method. The article also discusses the impact of maintaining the same perturbation results for words of different importance on the overall text utility. This method can be applied to long text protection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03707v1-abstract-full').style.display = 'none'; document.getElementById('2409.03707v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10556">arXiv:2408.10556</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.10556">pdf</a>, <a href="https://arxiv.org/format/2408.10556">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hokoff: Real Game Dataset from Honor of Kings and its Offline Reinforcement Learning Benchmarks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qu%2C+Y">Yun Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+B">Boyuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Shao%2C+J">Jianzhun Shao</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yuhang Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Z">Zhenbin Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+L">Lin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Junfeng Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Lai%2C+L">Lin Lai</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+H">Hongyang Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+M">Minwen Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhuo%2C+J">Juchao Zhuo</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+D">Deheng Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+W">Wei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+G">Guang Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+L">Lanxiao Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+X">Xiangyang Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10556v1-abstract-short" style="display: inline;"> The advancement of Offline Reinforcement Learning (RL) and Offline Multi-Agent Reinforcement Learning (MARL) critically depends on the availability of high-quality, pre-collected offline datasets that represent real-world complexities and practical applications. However, existing datasets often fall short in their simplicity and lack of realism. To address this gap, we propose Hokoff, a comprehens&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10556v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10556v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10556v1-abstract-full" style="display: none;"> The advancement of Offline Reinforcement Learning (RL) and Offline Multi-Agent Reinforcement Learning (MARL) critically depends on the availability of high-quality, pre-collected offline datasets that represent real-world complexities and practical applications. However, existing datasets often fall short in their simplicity and lack of realism. To address this gap, we propose Hokoff, a comprehensive set of pre-collected datasets that covers both offline RL and offline MARL, accompanied by a robust framework, to facilitate further research. This data is derived from Honor of Kings, a recognized Multiplayer Online Battle Arena (MOBA) game known for its intricate nature, closely resembling real-life situations. Utilizing this framework, we benchmark a variety of offline RL and offline MARL algorithms. We also introduce a novel baseline algorithm tailored for the inherent hierarchical action space of the game. We reveal the incompetency of current offline RL approaches in handling task complexity, generalization and multi-task learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10556v1-abstract-full').style.display = 'none'; document.getElementById('2408.10556v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21075">arXiv:2407.21075</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.21075">pdf</a>, <a href="https://arxiv.org/format/2407.21075">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Apple Intelligence Foundation Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gunter%2C+T">Tom Gunter</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zirui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Chong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Pang%2C+R">Ruoming Pang</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanan%2C+A">Andy Narayanan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+A">Aonan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+B">Bowen Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chiu%2C+C">Chung-Cheng Chiu</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+D">David Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Gopinath%2C+D">Deepak Gopinath</a>, <a href="/search/cs?searchtype=author&amp;query=Yap%2C+D+A">Dian Ang Yap</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+D">Dong Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Nan%2C+F">Feng Nan</a>, <a href="/search/cs?searchtype=author&amp;query=Weers%2C+F">Floris Weers</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+G">Guoli Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+H">Haoshuo Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jianyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+J">Jiarui Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Peebles%2C+J">John Peebles</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+K">Ke Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+M">Mark Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+N">Nan Du</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Q">Qibin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Keunebroek%2C+Q">Quentin Keunebroek</a> , et al. (130 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21075v1-abstract-short" style="display: inline;"> We present foundation language models developed to power Apple Intelligence features, including a ~3 billion parameter model designed to run efficiently on devices and a large server-based language model designed for Private Cloud Compute. These models are designed to perform a wide range of tasks efficiently, accurately, and responsibly. This report describes the model architecture, the data used&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21075v1-abstract-full').style.display = 'inline'; document.getElementById('2407.21075v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21075v1-abstract-full" style="display: none;"> We present foundation language models developed to power Apple Intelligence features, including a ~3 billion parameter model designed to run efficiently on devices and a large server-based language model designed for Private Cloud Compute. These models are designed to perform a wide range of tasks efficiently, accurately, and responsibly. This report describes the model architecture, the data used to train the model, the training process, how the models are optimized for inference, and the evaluation results. We highlight our focus on Responsible AI and how the principles are applied throughout the model development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21075v1-abstract-full').style.display = 'none'; document.getElementById('2407.21075v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14057">arXiv:2407.14057</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.14057">pdf</a>, <a href="https://arxiv.org/format/2407.14057">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LazyLLM: Dynamic Token Pruning for Efficient Long Context LLM Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qichen Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+M">Minsik Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Merth%2C+T">Thomas Merth</a>, <a href="/search/cs?searchtype=author&amp;query=Mehta%2C+S">Sachin Mehta</a>, <a href="/search/cs?searchtype=author&amp;query=Rastegari%2C+M">Mohammad Rastegari</a>, <a href="/search/cs?searchtype=author&amp;query=Najibi%2C+M">Mahyar Najibi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14057v1-abstract-short" style="display: inline;"> The inference of transformer-based large language models consists of two sequential stages: 1) a prefilling stage to compute the KV cache of prompts and generate the first token, and 2) a decoding stage to generate subsequent tokens. For long prompts, the KV cache must be computed for all tokens during the prefilling stage, which can significantly increase the time needed to generate the first tok&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14057v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14057v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14057v1-abstract-full" style="display: none;"> The inference of transformer-based large language models consists of two sequential stages: 1) a prefilling stage to compute the KV cache of prompts and generate the first token, and 2) a decoding stage to generate subsequent tokens. For long prompts, the KV cache must be computed for all tokens during the prefilling stage, which can significantly increase the time needed to generate the first token. Consequently, the prefilling stage may become a bottleneck in the generation process. An open question remains whether all prompt tokens are essential for generating the first token. To answer this, we introduce a novel method, LazyLLM, that selectively computes the KV for tokens important for the next token prediction in both the prefilling and decoding stages. Contrary to static pruning approaches that prune the prompt at once, LazyLLM allows language models to dynamically select different subsets of tokens from the context in different generation steps, even though they might be pruned in previous steps. Extensive experiments on standard datasets across various tasks demonstrate that LazyLLM is a generic method that can be seamlessly integrated with existing language models to significantly accelerate the generation without fine-tuning. For instance, in the multi-document question-answering task, LazyLLM accelerates the prefilling stage of the LLama 2 7B model by 2.34x while maintaining accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14057v1-abstract-full').style.display = 'none'; document.getElementById('2407.14057v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.11033">arXiv:2407.11033</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.11033">pdf</a>, <a href="https://arxiv.org/format/2407.11033">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Hadamard Adapter: An Extreme Parameter-Efficient Adapter Tuning Method for Pre-trained Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuyan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+G">Ge Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+L">Lun Du</a>, <a href="/search/cs?searchtype=author&amp;query=Lou%2C+J">Jian-Guang Lou</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+S">Shi Han</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dongmei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhixu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yanghua Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.11033v1-abstract-short" style="display: inline;"> Recent years, Pre-trained Language models (PLMs) have swept into various fields of artificial intelligence and achieved great success. However, most PLMs, such as T5 and GPT3, have a huge amount of parameters, fine-tuning them is often expensive and time consuming, and storing them takes up a lot of space. Therefore, it is necessary to adopt a parameter-efficient approach to reduce parameters of P&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11033v1-abstract-full').style.display = 'inline'; document.getElementById('2407.11033v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.11033v1-abstract-full" style="display: none;"> Recent years, Pre-trained Language models (PLMs) have swept into various fields of artificial intelligence and achieved great success. However, most PLMs, such as T5 and GPT3, have a huge amount of parameters, fine-tuning them is often expensive and time consuming, and storing them takes up a lot of space. Therefore, it is necessary to adopt a parameter-efficient approach to reduce parameters of PLMs in fine-tuning without compromising their performance in downstream tasks. In this paper, we design a novel adapter which only acts on self-attention outputs in PLMs. This adapter adopts element-wise linear transformation using Hadamard product, hence named as Hadamard adapter, requires the fewest parameters compared to previous parameter-efficient adapters. In addition, we also summarize some tuning patterns for Hadamard adapter shared by various downstream tasks, expecting to provide some guidance for further parameter reduction with shared adapters in future studies. The experiments conducted on the widely-used GLUE benchmark with several SOTA PLMs prove that the Hadamard adapter achieves competitive performance with only 0.033\% parameters compared with full fine-tuning, and it has the fewest parameters compared with other adapters. Moreover, we further find that there is also some redundant layers in the Hadamard adapter which can be removed to achieve more parameter efficiency with only 0.022\% parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11033v1-abstract-full').style.display = 'none'; document.getElementById('2407.11033v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CIKM 2023 (Long Paper)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07052">arXiv:2407.07052</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.07052">pdf</a>, <a href="https://arxiv.org/format/2407.07052">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Latent Space Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Souza%2C+M">Matheus Souza</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yidan Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+K">Kaizhang Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Mishra%2C+Y+N">Yogeshwar Nath Mishra</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Heidrich%2C+W">Wolfgang Heidrich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07052v1-abstract-short" style="display: inline;"> Digital imaging systems have classically been based on brute-force measuring and processing of pixels organized on regular grids. The human visual system, on the other hand, performs a massive data reduction from the number of photo-receptors to the optic nerve, essentially encoding the image information into a low bandwidth latent space representation suitable for processing by the human brain. I&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07052v1-abstract-full').style.display = 'inline'; document.getElementById('2407.07052v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07052v1-abstract-full" style="display: none;"> Digital imaging systems have classically been based on brute-force measuring and processing of pixels organized on regular grids. The human visual system, on the other hand, performs a massive data reduction from the number of photo-receptors to the optic nerve, essentially encoding the image information into a low bandwidth latent space representation suitable for processing by the human brain. In this work, we propose to follow a similar approach for the development of artificial vision systems. Latent Space Imaging is a new paradigm that, through a combination of optics and software, directly encodes the image information into the semantically rich latent space of a generative model, thus substantially reducing bandwidth and memory requirements during the capture process. We demonstrate this new principle through an initial hardware prototype based on the single pixel camera. By designing an amplitude modulation scheme that encodes into the latent space of a generative model, we achieve compression ratios from 1:100 to 1:1,000 during the imaging process, illustrating the potential of latent space imaging for highly efficient imaging hardware, to enable future applications in high speed imaging, or task-specific cameras with substantially reduced hardware complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07052v1-abstract-full').style.display = 'none'; document.getElementById('2407.07052v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04121">arXiv:2407.04121</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.04121">pdf</a>, <a href="https://arxiv.org/format/2407.04121">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Hallucination Detection: Robustly Discerning Reliable Answers in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuyan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+Y">Yichen Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+Z">Zhihao Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+G">Ge Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+D">Dayiheng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dongmei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhixu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Y">Yanghua Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04121v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have gained widespread adoption in various natural language processing tasks, including question answering and dialogue systems. However, a major drawback of LLMs is the issue of hallucination, where they generate unfaithful or inconsistent content that deviates from the input source, leading to severe consequences. In this paper, we propose a robust discriminator name&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04121v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04121v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04121v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have gained widespread adoption in various natural language processing tasks, including question answering and dialogue systems. However, a major drawback of LLMs is the issue of hallucination, where they generate unfaithful or inconsistent content that deviates from the input source, leading to severe consequences. In this paper, we propose a robust discriminator named RelD to effectively detect hallucination in LLMs&#39; generated answers. RelD is trained on the constructed RelQA, a bilingual question-answering dialogue dataset along with answers generated by LLMs and a comprehensive set of metrics. Our experimental results demonstrate that the proposed RelD successfully detects hallucination in the answers generated by diverse LLMs. Moreover, it performs well in distinguishing hallucination in LLMs&#39; generated answers from both in-distribution and out-of-distribution datasets. Additionally, we also conduct a thorough analysis of the types of hallucinations that occur and present valuable insights. This research significantly contributes to the detection of reliable answers generated by LLMs and holds noteworthy implications for mitigating hallucination in the future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04121v1-abstract-full').style.display = 'none'; document.getElementById('2407.04121v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CIKM 2023 (Long Paper)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10537">arXiv:2406.10537</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.10537">pdf</a>, <a href="https://arxiv.org/format/2406.10537">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Scalable Differentiable Causal Discovery in the Presence of Latent Confounders with Skeleton Posterior (Extended Version) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+P">Pingchuan Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+R">Rui Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiaru Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+S">Shi Han</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dongmei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10537v1-abstract-short" style="display: inline;"> Differentiable causal discovery has made significant advancements in the learning of directed acyclic graphs. However, its application to real-world datasets remains restricted due to the ubiquity of latent confounders and the requirement to learn maximal ancestral graphs (MAGs). To date, existing differentiable MAG learning algorithms have been limited to small datasets and failed to scale to lar&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10537v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10537v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10537v1-abstract-full" style="display: none;"> Differentiable causal discovery has made significant advancements in the learning of directed acyclic graphs. However, its application to real-world datasets remains restricted due to the ubiquity of latent confounders and the requirement to learn maximal ancestral graphs (MAGs). To date, existing differentiable MAG learning algorithms have been limited to small datasets and failed to scale to larger ones (e.g., with more than 50 variables). The key insight in this paper is that the causal skeleton, which is the undirected version of the causal graph, has potential for improving accuracy and reducing the search space of the optimization procedure, thereby enhancing the performance of differentiable causal discovery. Therefore, we seek to address a two-fold challenge to harness the potential of the causal skeleton for differentiable causal discovery in the presence of latent confounders: (1) scalable and accurate estimation of skeleton and (2) universal integration of skeleton estimation with differentiable causal discovery. To this end, we propose SPOT (Skeleton Posterior-guided OpTimization), a two-phase framework that harnesses skeleton posterior for differentiable causal discovery in the presence of latent confounders. On the contrary to a ``point-estimation&#39;&#39;, SPOT seeks to estimate the posterior distribution of skeletons given the dataset. It first formulates the posterior inference as an instance of amortized inference problem and concretizes it with a supervised causal learning (SCL)-enabled solution to estimate the skeleton posterior. To incorporate the skeleton posterior with differentiable causal discovery, SPOT then features a skeleton posterior-guided stochastic optimization procedure to guide the optimization of MAGs. [abridged due to length limit] <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10537v1-abstract-full').style.display = 'none'; document.getElementById('2406.10537v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00834">arXiv:2406.00834</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.00834">pdf</a>, <a href="https://arxiv.org/format/2406.00834">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> End-to-End Hybrid Refractive-Diffractive Lens Design with Differentiable Ray-Wave Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xinge Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Souza%2C+M">Matheus Souza</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+K">Kunyi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chakravarthula%2C+P">Praneeth Chakravarthula</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Heidrich%2C+W">Wolfgang Heidrich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00834v1-abstract-short" style="display: inline;"> Hybrid refractive-diffractive lenses combine the light efficiency of refractive lenses with the information encoding power of diffractive optical elements (DOE), showing great potential as the next generation of imaging systems. However, accurately simulating such hybrid designs is generally difficult, and in particular, there are no existing differentiable image formation models for hybrid lenses&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00834v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00834v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00834v1-abstract-full" style="display: none;"> Hybrid refractive-diffractive lenses combine the light efficiency of refractive lenses with the information encoding power of diffractive optical elements (DOE), showing great potential as the next generation of imaging systems. However, accurately simulating such hybrid designs is generally difficult, and in particular, there are no existing differentiable image formation models for hybrid lenses with sufficient accuracy. In this work, we propose a new hybrid ray-tracing and wave-propagation (ray-wave) model for accurate simulation of both optical aberrations and diffractive phase modulation, where the DOE is placed between the last refractive surface and the image sensor, i.e. away from the Fourier plane that is often used as a DOE position. The proposed ray-wave model is fully differentiable, enabling gradient back-propagation for end-to-end co-design of refractive-diffractive lens optimization and the image reconstruction network. We validate the accuracy of the proposed model by comparing the simulated point spread functions (PSFs) with theoretical results, as well as simulation experiments that show our model to be more accurate than solutions implemented in commercial software packages like Zemax. We demonstrate the effectiveness of the proposed model through real-world experiments and show significant improvements in both aberration correction and extended depth-of-field (EDoF) imaging. We believe the proposed model will motivate further investigation into a wide range of applications in computational imaging, computational photography, and advanced optical design. Code will be released upon publication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00834v1-abstract-full').style.display = 'none'; document.getElementById('2406.00834v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19846">arXiv:2405.19846</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.19846">pdf</a>, <a href="https://arxiv.org/format/2405.19846">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Quest: Query-centric Data Synthesis Approach for Long-context Scaling of Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gao%2C+C">Chaochen Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xing Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qi Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+S">Songlin Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19846v5-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) have highlighted the importance of extending context lengths for handling complex tasks. While traditional methods for training on long contexts often use filtered long documents, these approaches lead to domain imbalances, limiting model performance. To address this, techniques like random document concatenation (Standard) and similarity-based m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19846v5-abstract-full').style.display = 'inline'; document.getElementById('2405.19846v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19846v5-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) have highlighted the importance of extending context lengths for handling complex tasks. While traditional methods for training on long contexts often use filtered long documents, these approaches lead to domain imbalances, limiting model performance. To address this, techniques like random document concatenation (Standard) and similarity-based methods (KNN, ICLM) have been developed. However, they either sacrifice semantic coherence or diversity. To balance both aspects, we introduce Quest, a query-centric data synthesis method aggregating semantically relevant yet diverse documents. Quest uses a generative model to predict potential queries for each document, grouping documents with similar queries and keywords. Extensive experiments demonstrate Quest&#39;s superior performance on long-context tasks, achieving remarkable results with context lengths of up to 1M tokens and confirming its scalability across various model sizes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19846v5-abstract-full').style.display = 'none'; document.getElementById('2405.19846v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.08638">arXiv:2405.08638</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.08638">pdf</a>, <a href="https://arxiv.org/format/2405.08638">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> vMFER: Von Mises-Fisher Experience Resampling Based on Uncertainty of Gradient Directions for Policy Improvement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yiwen Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jinyi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+W">Wenya Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qianyi Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Y">Yujing Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+Z">Zhou Fang</a>, <a href="/search/cs?searchtype=author&amp;query=An%2C+B">Bo An</a>, <a href="/search/cs?searchtype=author&amp;query=Hao%2C+J">Jianye Hao</a>, <a href="/search/cs?searchtype=author&amp;query=Lv%2C+T">Tangjie Lv</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+C">Changjie Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.08638v1-abstract-short" style="display: inline;"> Reinforcement Learning (RL) is a widely employed technique in decision-making problems, encompassing two fundamental operations -- policy evaluation and policy improvement. Enhancing learning efficiency remains a key challenge in RL, with many efforts focused on using ensemble critics to boost policy evaluation efficiency. However, when using multiple critics, the actor in the policy improvement p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08638v1-abstract-full').style.display = 'inline'; document.getElementById('2405.08638v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.08638v1-abstract-full" style="display: none;"> Reinforcement Learning (RL) is a widely employed technique in decision-making problems, encompassing two fundamental operations -- policy evaluation and policy improvement. Enhancing learning efficiency remains a key challenge in RL, with many efforts focused on using ensemble critics to boost policy evaluation efficiency. However, when using multiple critics, the actor in the policy improvement process can obtain different gradients. Previous studies have combined these gradients without considering their disagreements. Therefore, optimizing the policy improvement process is crucial to enhance learning efficiency. This study focuses on investigating the impact of gradient disagreements caused by ensemble critics on policy improvement. We introduce the concept of uncertainty of gradient directions as a means to measure the disagreement among gradients utilized in the policy improvement process. Through measuring the disagreement among gradients, we find that transitions with lower uncertainty of gradient directions are more reliable in the policy improvement process. Building on this analysis, we propose a method called von Mises-Fisher Experience Resampling (vMFER), which optimizes the policy improvement process by resampling transitions and assigning higher confidence to transitions with lower uncertainty of gradient directions. Our experiments demonstrate that vMFER significantly outperforms the benchmark and is particularly well-suited for ensemble structures in RL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08638v1-abstract-full').style.display = 'none'; document.getElementById('2405.08638v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IJCAI 2024, with appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13891">arXiv:2404.13891</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.13891">pdf</a>, <a href="https://arxiv.org/format/2404.13891">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Minimizing Weighted Counterfactual Regret with Optimistic Online Mirror Descent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+H">Hang Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+K">Kai Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+B">Bingyun Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+H">Haobo Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+J">Junliang Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jian Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13891v2-abstract-short" style="display: inline;"> Counterfactual regret minimization (CFR) is a family of algorithms for effectively solving imperfect-information games. It decomposes the total regret into counterfactual regrets, utilizing local regret minimization algorithms, such as Regret Matching (RM) or RM+, to minimize them. Recent research establishes a connection between Online Mirror Descent (OMD) and RM+, paving the way for an optimisti&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13891v2-abstract-full').style.display = 'inline'; document.getElementById('2404.13891v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13891v2-abstract-full" style="display: none;"> Counterfactual regret minimization (CFR) is a family of algorithms for effectively solving imperfect-information games. It decomposes the total regret into counterfactual regrets, utilizing local regret minimization algorithms, such as Regret Matching (RM) or RM+, to minimize them. Recent research establishes a connection between Online Mirror Descent (OMD) and RM+, paving the way for an optimistic variant PRM+ and its extension PCFR+. However, PCFR+ assigns uniform weights for each iteration when determining regrets, leading to substantial regrets when facing dominated actions. This work explores minimizing weighted counterfactual regret with optimistic OMD, resulting in a novel CFR variant PDCFR+. It integrates PCFR+ and Discounted CFR (DCFR) in a principled manner, swiftly mitigating negative effects of dominated actions and consistently leveraging predictions to accelerate convergence. Theoretical analyses prove that PDCFR+ converges to a Nash equilibrium, particularly under distinct weighting schemes for regrets and average strategies. Experimental results demonstrate PDCFR+&#39;s fast convergence in common imperfect-information games. The code is available at https://github.com/rpSebastian/PDCFRPlus. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13891v2-abstract-full').style.display = 'none'; document.getElementById('2404.13891v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to 33rd International Joint Conference on Artificial Intelligence (IJCAI 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06910">arXiv:2404.06910</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.06910">pdf</a>, <a href="https://arxiv.org/format/2404.06910">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Superposition Prompting: Improving and Accelerating Retrieval-Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Merth%2C+T">Thomas Merth</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qichen Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Rastegari%2C+M">Mohammad Rastegari</a>, <a href="/search/cs?searchtype=author&amp;query=Najibi%2C+M">Mahyar Najibi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06910v2-abstract-short" style="display: inline;"> Despite the successes of large language models (LLMs), they exhibit significant drawbacks, particularly when processing long contexts. Their inference cost scales quadratically with respect to sequence length, making it expensive for deployment in some real-world text processing applications, such as retrieval-augmented generation (RAG). Additionally, LLMs also exhibit the &#34;distraction phenomenon&#34;&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06910v2-abstract-full').style.display = 'inline'; document.getElementById('2404.06910v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06910v2-abstract-full" style="display: none;"> Despite the successes of large language models (LLMs), they exhibit significant drawbacks, particularly when processing long contexts. Their inference cost scales quadratically with respect to sequence length, making it expensive for deployment in some real-world text processing applications, such as retrieval-augmented generation (RAG). Additionally, LLMs also exhibit the &#34;distraction phenomenon&#34;, where irrelevant context in the prompt degrades output quality. To address these drawbacks, we propose a novel RAG prompting methodology, *superposition prompting*, which can be directly applied to pre-trained transformer-based LLMs *without the need for fine-tuning*. At a high level, superposition prompting allows the LLM to process input documents in parallel *prompt paths*, discarding paths once they are deemed irrelevant. We demonstrate the capability of our method to simultaneously enhance time efficiency across a variety of question-answering benchmarks using multiple pre-trained LLMs. Furthermore, our technique significantly improves accuracy when the retrieved context is large relative the context the model was trained on. For example, our approach facilitates a 93x reduction in compute time while *improving* accuracy by 43% on the NaturalQuestions-Open dataset with the MPT-7B instruction-tuned model over naive RAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06910v2-abstract-full').style.display = 'none'; document.getElementById('2404.06910v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.18057">arXiv:2403.18057</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.18057">pdf</a>, <a href="https://arxiv.org/format/2403.18057">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Prioritized League Reinforcement Learning for Large-Scale Heterogeneous Multiagent Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qingxu Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Pu%2C+Z">Zhiqiang Pu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+M">Min Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+T">Tenghai Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Yi%2C+J">Jianqiang Yi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.18057v1-abstract-short" style="display: inline;"> Large-scale heterogeneous multiagent systems feature various realistic factors in the real world, such as agents with diverse abilities and overall system cost. In comparison to homogeneous systems, heterogeneous systems offer significant practical advantages. Nonetheless, they also present challenges for multiagent reinforcement learning, including addressing the non-stationary problem and managi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18057v1-abstract-full').style.display = 'inline'; document.getElementById('2403.18057v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.18057v1-abstract-full" style="display: none;"> Large-scale heterogeneous multiagent systems feature various realistic factors in the real world, such as agents with diverse abilities and overall system cost. In comparison to homogeneous systems, heterogeneous systems offer significant practical advantages. Nonetheless, they also present challenges for multiagent reinforcement learning, including addressing the non-stationary problem and managing an imbalanced number of agents with different types. We propose a Prioritized Heterogeneous League Reinforcement Learning (PHLRL) method to address large-scale heterogeneous cooperation problems. PHLRL maintains a record of various policies that agents have explored during their training and establishes a heterogeneous league consisting of diverse policies to aid in future policy optimization. Furthermore, we design a prioritized policy gradient approach to compensate for the gap caused by differences in the number of different types of agents. Next, we use Unreal Engine to design a large-scale heterogeneous cooperation benchmark named Large-Scale Multiagent Operation (LSMO), which is a complex two-team competition scenario that requires collaboration from both ground and airborne agents. We use experiments to show that PHLRL outperforms state-of-the-art methods, including QTRAN and QPLEX in LSMO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18057v1-abstract-full').style.display = 'none'; document.getElementById('2403.18057v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.18056">arXiv:2403.18056</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.18056">pdf</a>, <a href="https://arxiv.org/format/2403.18056">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Self-Clustering Hierarchical Multi-Agent Reinforcement Learning with Extensible Cooperation Graph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qingxu Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+T">Tenghai Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Yi%2C+J">Jianqiang Yi</a>, <a href="/search/cs?searchtype=author&amp;query=Pu%2C+Z">Zhiqiang Pu</a>, <a href="/search/cs?searchtype=author&amp;query=Ai%2C+X">Xiaolin Ai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.18056v1-abstract-short" style="display: inline;"> Multi-Agent Reinforcement Learning (MARL) has been successful in solving many cooperative challenges. However, classic non-hierarchical MARL algorithms still cannot address various complex multi-agent problems that require hierarchical cooperative behaviors. The cooperative knowledge and policies learned in non-hierarchical algorithms are implicit and not interpretable, thereby restricting the int&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18056v1-abstract-full').style.display = 'inline'; document.getElementById('2403.18056v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.18056v1-abstract-full" style="display: none;"> Multi-Agent Reinforcement Learning (MARL) has been successful in solving many cooperative challenges. However, classic non-hierarchical MARL algorithms still cannot address various complex multi-agent problems that require hierarchical cooperative behaviors. The cooperative knowledge and policies learned in non-hierarchical algorithms are implicit and not interpretable, thereby restricting the integration of existing knowledge. This paper proposes a novel hierarchical MARL model called Hierarchical Cooperation Graph Learning (HCGL) for solving general multi-agent problems. HCGL has three components: a dynamic Extensible Cooperation Graph (ECG) for achieving self-clustering cooperation; a group of graph operators for adjusting the topology of ECG; and an MARL optimizer for training these graph operators. HCGL&#39;s key distinction from other MARL models is that the behaviors of agents are guided by the topology of ECG instead of policy neural networks. ECG is a three-layer graph consisting of an agent node layer, a cluster node layer, and a target node layer. To manipulate the ECG topology in response to changing environmental conditions, four graph operators are trained to adjust the edge connections of ECG dynamically. The hierarchical feature of ECG provides a unique approach to merge primitive actions (actions executed by the agents) and cooperative actions (actions executed by the clusters) into a unified action space, allowing us to integrate fundamental cooperative knowledge into an extensible interface. In our experiments, the HCGL model has shown outstanding performance in multi-agent benchmarks with sparse rewards. We also verify that HCGL can easily be transferred to large-scale scenarios with high zero-shot transfer success rates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18056v1-abstract-full').style.display = 'none'; document.getElementById('2403.18056v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03172">arXiv:2403.03172</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.03172">pdf</a>, <a href="https://arxiv.org/format/2403.03172">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Reaching Consensus in Cooperative Multi-Agent Reinforcement Learning with Goal Imagination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Liangzhou Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+K">Kaiwen Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+F">Fengming Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+X">Xinghu Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shujie Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+D">Deheng Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+H">Haobo Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+W">Wei Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03172v1-abstract-short" style="display: inline;"> Reaching consensus is key to multi-agent coordination. To accomplish a cooperative task, agents need to coherently select optimal joint actions to maximize the team reward. However, current cooperative multi-agent reinforcement learning (MARL) methods usually do not explicitly take consensus into consideration, which may cause miscoordination problem. In this paper, we propose a model-based consen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03172v1-abstract-full').style.display = 'inline'; document.getElementById('2403.03172v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03172v1-abstract-full" style="display: none;"> Reaching consensus is key to multi-agent coordination. To accomplish a cooperative task, agents need to coherently select optimal joint actions to maximize the team reward. However, current cooperative multi-agent reinforcement learning (MARL) methods usually do not explicitly take consensus into consideration, which may cause miscoordination problem. In this paper, we propose a model-based consensus mechanism to explicitly coordinate multiple agents. The proposed Multi-agent Goal Imagination (MAGI) framework guides agents to reach consensus with an Imagined common goal. The common goal is an achievable state with high value, which is obtained by sampling from the distribution of future states. We directly model this distribution with a self-supervised generative model, thus alleviating the &#34;curse of dimensinality&#34; problem induced by multi-agent multi-step policy rollout commonly used in model-based methods. We show that such efficient consensus mechanism can guide all agents cooperatively reaching valuable future states. Results on Multi-agent Particle-Environments and Google Research Football environment demonstrate the superiority of MAGI in both sample efficiency and performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03172v1-abstract-full').style.display = 'none'; document.getElementById('2403.03172v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.01700">arXiv:2403.01700</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.01700">pdf</a>, <a href="https://arxiv.org/format/2403.01700">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Robust Wake Word Spotting With Frame-Level Cross-Modal Attention Based Audio-Visual Conformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Haoxu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+M">Ming Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+M">Ming Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.01700v1-abstract-short" style="display: inline;"> In recent years, neural network-based Wake Word Spotting achieves good performance on clean audio samples but struggles in noisy environments. Audio-Visual Wake Word Spotting (AVWWS) receives lots of attention because visual lip movement information is not affected by complex acoustic scenes. Previous works usually use simple addition or concatenation for multi-modal fusion. The inter-modal correl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01700v1-abstract-full').style.display = 'inline'; document.getElementById('2403.01700v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.01700v1-abstract-full" style="display: none;"> In recent years, neural network-based Wake Word Spotting achieves good performance on clean audio samples but struggles in noisy environments. Audio-Visual Wake Word Spotting (AVWWS) receives lots of attention because visual lip movement information is not affected by complex acoustic scenes. Previous works usually use simple addition or concatenation for multi-modal fusion. The inter-modal correlation remains relatively under-explored. In this paper, we propose a novel module called Frame-Level Cross-Modal Attention (FLCMA) to improve the performance of AVWWS systems. This module can help model multi-modal information at the frame-level through synchronous lip movements and speech signals. We train the end-to-end FLCMA based Audio-Visual Conformer and further improve the performance by fine-tuning pre-trained uni-modal models for the AVWWS task. The proposed system achieves a new state-of-the-art result (4.57% WWS score) on the far-field MISP dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01700v1-abstract-full').style.display = 'none'; document.getElementById('2403.01700v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11131">arXiv:2402.11131</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.11131">pdf</a>, <a href="https://arxiv.org/format/2402.11131">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Speculative Streaming: Fast LLM Inference without Auxiliary Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bhendawade%2C+N">Nikhil Bhendawade</a>, <a href="/search/cs?searchtype=author&amp;query=Belousova%2C+I">Irina Belousova</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qichen Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Mason%2C+H">Henry Mason</a>, <a href="/search/cs?searchtype=author&amp;query=Rastegari%2C+M">Mohammad Rastegari</a>, <a href="/search/cs?searchtype=author&amp;query=Najibi%2C+M">Mahyar Najibi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11131v1-abstract-short" style="display: inline;"> Speculative decoding is a prominent technique to speed up the inference of a large target language model based on predictions of an auxiliary draft model. While effective, in application-specific settings, it often involves fine-tuning both draft and target models to achieve high acceptance rates. As the number of downstream tasks grows, these draft models add significant complexity to inference s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11131v1-abstract-full').style.display = 'inline'; document.getElementById('2402.11131v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11131v1-abstract-full" style="display: none;"> Speculative decoding is a prominent technique to speed up the inference of a large target language model based on predictions of an auxiliary draft model. While effective, in application-specific settings, it often involves fine-tuning both draft and target models to achieve high acceptance rates. As the number of downstream tasks grows, these draft models add significant complexity to inference systems. We propose Speculative Streaming, a single-model speculative decoding method that fuses drafting into the target model by changing the fine-tuning objective from next token prediction to future n-gram prediction. Speculative Streaming speeds up decoding by 1.8 - 3.1X in a diverse set of tasks, such as Summarization, Structured Queries, and Meaning Representation, without sacrificing generation quality. Additionally, Speculative Streaming is parameter-efficient. It achieves on-par/higher speed-ups than Medusa-style architectures while using ~10000X fewer extra parameters, making it well-suited for resource-constrained devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11131v1-abstract-full').style.display = 'none'; document.getElementById('2402.11131v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05359">arXiv:2402.05359</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.05359">pdf</a>, <a href="https://arxiv.org/format/2402.05359">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Examination on the Effectiveness of Divide-and-Conquer Prompting in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yizhou Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+L">Lun Du</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+D">Defu Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05359v6-abstract-short" style="display: inline;"> Foundation models, such as Large language Models (LLMs), have attracted significant amount of interest due to their large number of applications. However, when handling tasks involving repetitive sub-tasks and/or deceptive contents, such as arithmetic calculation and article-level fake news detection, simple instructional prompts suffer from inaccurate responses. Existing works show that more comp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05359v6-abstract-full').style.display = 'inline'; document.getElementById('2402.05359v6-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05359v6-abstract-full" style="display: none;"> Foundation models, such as Large language Models (LLMs), have attracted significant amount of interest due to their large number of applications. However, when handling tasks involving repetitive sub-tasks and/or deceptive contents, such as arithmetic calculation and article-level fake news detection, simple instructional prompts suffer from inaccurate responses. Existing works show that more complicated prompting strategies, such as Chain-of-Thoughts and Least-to-Most, can unlock LLM&#39;s powerful capacity in diverse areas. Recent researches reveal that simple divide-and-conquer prompting strategy, i.e. simply dividing the input sequence to multiple sub-inputs, can also substantially improve LLM&#39;s performance in some specific tasks such as misinformation detection. In this paper, we aim at examining the utility of divide-and-conquer prompting strategy and answer on which kind of tasks this strategy gets advantages. Specifically, we provide a theoretic analysis to divide-and-conquer prompting strategy and help us identify the specific tasks where DaC prompting can bring performance boost with theoretic guarantee. We then present two cases (large integer arithmetic and fact verification) where experimental results aligns with our theoretic analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05359v6-abstract-full').style.display = 'none'; document.getElementById('2402.05359v6-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05120">arXiv:2402.05120</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.05120">pdf</a>, <a href="https://arxiv.org/format/2402.05120">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> More Agents Is All You Need </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Junyou Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+Y">Yangbin Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+D">Deheng Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05120v2-abstract-short" style="display: inline;"> We find that, simply via a sampling-and-voting method, the performance of large language models (LLMs) scales with the number of agents instantiated. Also, this method, termed as Agent Forest, is orthogonal to existing complicated methods to further enhance LLMs, while the degree of enhancement is correlated to the task difficulty. We conduct comprehensive experiments on a wide range of LLM benchm&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05120v2-abstract-full').style.display = 'inline'; document.getElementById('2402.05120v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05120v2-abstract-full" style="display: none;"> We find that, simply via a sampling-and-voting method, the performance of large language models (LLMs) scales with the number of agents instantiated. Also, this method, termed as Agent Forest, is orthogonal to existing complicated methods to further enhance LLMs, while the degree of enhancement is correlated to the task difficulty. We conduct comprehensive experiments on a wide range of LLM benchmarks to verify the presence of our finding, and to study the properties that can facilitate its occurrence. Our code is publicly available at: https://github.com/MoreAgentsIsAllYouNeed/AgentForest <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05120v2-abstract-full').style.display = 'none'; document.getElementById('2402.05120v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at Transactions on Machine Learning Research (TMLR)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02330">arXiv:2402.02330</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.02330">pdf</a>, <a href="https://arxiv.org/format/2402.02330">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhance Reasoning for Large Language Models in the Game Werewolf </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+S">Shuang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+L">Liwen Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+T">Tao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+S">Shiwei Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+Y">Yang Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+H">Haobo Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02330v2-abstract-short" style="display: inline;"> This paper presents an innovative framework that integrates Large Language Models (LLMs) with an external Thinker module to enhance the reasoning capabilities of LLM-based agents. Unlike augmenting LLMs with prompt engineering, Thinker directly harnesses knowledge from databases and employs various optimization techniques. The framework forms a reasoning hierarchy where LLMs handle intuitive Syste&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02330v2-abstract-full').style.display = 'inline'; document.getElementById('2402.02330v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02330v2-abstract-full" style="display: none;"> This paper presents an innovative framework that integrates Large Language Models (LLMs) with an external Thinker module to enhance the reasoning capabilities of LLM-based agents. Unlike augmenting LLMs with prompt engineering, Thinker directly harnesses knowledge from databases and employs various optimization techniques. The framework forms a reasoning hierarchy where LLMs handle intuitive System-1 tasks such as natural language processing, while the Thinker focuses on cognitive System-2 tasks that require complex logical analysis and domain-specific knowledge. Our framework is presented using a 9-player Werewolf game that demands dual-system reasoning. We introduce a communication protocol between LLMs and the Thinker, and train the Thinker using data from 18800 human sessions and reinforcement learning. Experiments demonstrate the framework&#39;s effectiveness in deductive reasoning, speech generation, and online game evaluation. Additionally, we fine-tune a 6B LLM to surpass GPT4 when integrated with the Thinker. This paper also contributes the largest dataset for social deduction games to date. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02330v2-abstract-full').style.display = 'none'; document.getElementById('2402.02330v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02053">arXiv:2402.02053</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.02053">pdf</a>, <a href="https://arxiv.org/format/2402.02053">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Affordable Generative Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yu%2C+Y">Yangbin Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Junyou Li</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+D">Deheng Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02053v2-abstract-short" style="display: inline;"> The emergence of large language models (LLMs) has significantly advanced the simulation of believable interactive agents. However, the substantial cost on maintaining the prolonged agent interactions poses challenge over the deployment of believable LLM-based agents. Therefore, in this paper, we develop Affordable Generative Agents (AGA), a framework for enabling the generation of believable and l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02053v2-abstract-full').style.display = 'inline'; document.getElementById('2402.02053v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02053v2-abstract-full" style="display: none;"> The emergence of large language models (LLMs) has significantly advanced the simulation of believable interactive agents. However, the substantial cost on maintaining the prolonged agent interactions poses challenge over the deployment of believable LLM-based agents. Therefore, in this paper, we develop Affordable Generative Agents (AGA), a framework for enabling the generation of believable and low-cost interactions on both agent-environment and inter-agents levels. Specifically, for agent-environment interactions, we substitute repetitive LLM inferences with learned policies; while for inter-agent interactions, we model the social relationships between agents and compress auxiliary dialogue information. Extensive experiments on multiple environments show the effectiveness and efficiency of our proposed framework. Also, we delve into the mechanisms of emergent believable behaviors lying in LLM agents, demonstrating that agents can only generate finite behaviors in fixed environments, based upon which, we understand ways to facilitate emergent interaction behaviors. Our code is publicly available at: https://github.com/AffordableGenerativeAgents/Affordable-Generative-Agents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02053v2-abstract-full').style.display = 'none'; document.getElementById('2402.02053v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.16444">arXiv:2401.16444</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.16444">pdf</a>, <a href="https://arxiv.org/format/2401.16444">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Human Experience in Human-Agent Collaboration: A Human-Centered Modeling Approach Based on Positive Human Gain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gao%2C+Y">Yiming Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Feiyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Liang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lian%2C+Z">Zhenjie Lian</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+D">Dehua Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Weixuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+W">Wenjin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Siqin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xianliang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenhui Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+J">Jing Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+W">Wei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+L">Lanxiao Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Wei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.16444v1-abstract-short" style="display: inline;"> Existing game AI research mainly focuses on enhancing agents&#39; abilities to win games, but this does not inherently make humans have a better experience when collaborating with these agents. For example, agents may dominate the collaboration and exhibit unintended or detrimental behaviors, leading to poor experiences for their human partners. In other words, most game AI agents are modeled in a &#34;se&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16444v1-abstract-full').style.display = 'inline'; document.getElementById('2401.16444v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.16444v1-abstract-full" style="display: none;"> Existing game AI research mainly focuses on enhancing agents&#39; abilities to win games, but this does not inherently make humans have a better experience when collaborating with these agents. For example, agents may dominate the collaboration and exhibit unintended or detrimental behaviors, leading to poor experiences for their human partners. In other words, most game AI agents are modeled in a &#34;self-centered&#34; manner. In this paper, we propose a &#34;human-centered&#34; modeling scheme for collaborative agents that aims to enhance the experience of humans. Specifically, we model the experience of humans as the goals they expect to achieve during the task. We expect that agents should learn to enhance the extent to which humans achieve these goals while maintaining agents&#39; original abilities (e.g., winning games). To achieve this, we propose the Reinforcement Learning from Human Gain (RLHG) approach. The RLHG approach introduces a &#34;baseline&#34;, which corresponds to the extent to which humans primitively achieve their goals, and encourages agents to learn behaviors that can effectively enhance humans in achieving their goals better. We evaluate the RLHG agent in the popular Multi-player Online Battle Arena (MOBA) game, Honor of Kings, by conducting real-world human-agent tests. Both objective performance and subjective preference results show that the RLHG agent provides participants better gaming experience. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16444v1-abstract-full').style.display = 'none'; document.getElementById('2401.16444v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICLR 2024. arXiv admin note: text overlap with arXiv:2304.11632</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.07525">arXiv:2401.07525</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.07525">pdf</a>, <a href="https://arxiv.org/format/2401.07525">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TAROT: A Hierarchical Framework with Multitask Co-Pretraining on Semi-Structured Data towards Effective Person-Job Fit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yihan Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+L">Lun Du</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+S">Shi Han</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+Y">Yushu Du</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+Y">Yanbin Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+G">Guangming Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zi Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.07525v2-abstract-short" style="display: inline;"> Person-job fit is an essential part of online recruitment platforms in serving various downstream applications like Job Search and Candidate Recommendation. Recently, pretrained large language models have further enhanced the effectiveness by leveraging richer textual information in user profiles and job descriptions apart from user behavior features and job metadata. However, the general domain-o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.07525v2-abstract-full').style.display = 'inline'; document.getElementById('2401.07525v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.07525v2-abstract-full" style="display: none;"> Person-job fit is an essential part of online recruitment platforms in serving various downstream applications like Job Search and Candidate Recommendation. Recently, pretrained large language models have further enhanced the effectiveness by leveraging richer textual information in user profiles and job descriptions apart from user behavior features and job metadata. However, the general domain-oriented design struggles to capture the unique structural information within user profiles and job descriptions, leading to a loss of latent semantic correlations. We propose TAROT, a hierarchical multitask co-pretraining framework, to better utilize structural and semantic information for informative text embeddings. TAROT targets semi-structured text in profiles and jobs, and it is co-pretained with multi-grained pretraining tasks to constrain the acquired semantic information at each level. Experiments on a real-world LinkedIn dataset show significant performance improvements, proving its effectiveness in person-job fit tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.07525v2-abstract-full').style.display = 'none'; document.getElementById('2401.07525v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICASSP 2024 camera ready. 5 pages, 1 figure, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.06431">arXiv:2401.06431</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.06431">pdf</a>, <a href="https://arxiv.org/format/2401.06431">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Human-AI Collaborative Essay Scoring: A Dual-Process Framework with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+C">Changrong Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+W">Wenxing Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Q">Qingping Song</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+S+X">Sean Xin Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kunpeng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yufang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qi Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.06431v2-abstract-short" style="display: inline;"> Receiving timely and personalized feedback is essential for second-language learners, especially when human instructors are unavailable. This study explores the effectiveness of Large Language Models (LLMs), including both proprietary and open-source models, for Automated Essay Scoring (AES). Through extensive experiments with public and private datasets, we find that while LLMs do not surpass con&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06431v2-abstract-full').style.display = 'inline'; document.getElementById('2401.06431v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.06431v2-abstract-full" style="display: none;"> Receiving timely and personalized feedback is essential for second-language learners, especially when human instructors are unavailable. This study explores the effectiveness of Large Language Models (LLMs), including both proprietary and open-source models, for Automated Essay Scoring (AES). Through extensive experiments with public and private datasets, we find that while LLMs do not surpass conventional state-of-the-art (SOTA) grading models in performance, they exhibit notable consistency, generalizability, and explainability. We propose an open-source LLM-based AES system, inspired by the dual-process theory. Our system offers accurate grading and high-quality feedback, at least comparable to that of fine-tuned proprietary LLMs, in addition to its ability to alleviate misgrading. Furthermore, we conduct human-AI co-grading experiments with both novice and expert graders. We find that our system not only automates the grading process but also enhances the performance and efficiency of human graders, particularly for essays where the model has lower confidence. These results highlight the potential of LLMs to facilitate effective human-AI collaboration in the educational context, potentially transforming learning experiences through AI-generated feedback. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06431v2-abstract-full').style.display = 'none'; document.getElementById('2401.06431v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03835">arXiv:2401.03835</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.03835">pdf</a>, <a href="https://arxiv.org/format/2401.03835">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Limitations of Data-Driven Spectral Reconstruction -- Optics-Aware Analysis and Mitigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Souza%2C+M">Matheus Souza</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+E">Eunsue Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+S">Suhyun Shin</a>, <a href="/search/cs?searchtype=author&amp;query=Baek%2C+S">Seung-Hwan Baek</a>, <a href="/search/cs?searchtype=author&amp;query=Heidrich%2C+W">Wolfgang Heidrich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03835v2-abstract-short" style="display: inline;"> Hyperspectral imaging empowers machine vision systems with the distinct capability of identifying materials through recording their spectral signatures. Recent efforts in data-driven spectral reconstruction aim at extracting spectral information from RGB images captured by cost-effective RGB cameras, instead of dedicated hardware. In this paper we systematically analyze the performance of such m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03835v2-abstract-full').style.display = 'inline'; document.getElementById('2401.03835v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03835v2-abstract-full" style="display: none;"> Hyperspectral imaging empowers machine vision systems with the distinct capability of identifying materials through recording their spectral signatures. Recent efforts in data-driven spectral reconstruction aim at extracting spectral information from RGB images captured by cost-effective RGB cameras, instead of dedicated hardware. In this paper we systematically analyze the performance of such methods, evaluating both the practical limitations with respect to current datasets and overfitting, as well as fundamental limitations with respect to the nature of the information encoded in the RGB images, and the dependency of this information on the optical system of the camera. We find that, the current models are not robust under slight variations, e.g., in noise level or compression of the RGB file. Without modeling underrepresented spectral content, existing datasets and the models trained on them are limited in their ability to cope with challenging metameric colors. To mitigate this issue, we propose to exploit the combination of metameric data augmentation and optical lens aberrations to improve the encoding of the metameric information into the RGB image, which paves the road towards higher performing spectral imaging and reconstruction approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03835v2-abstract-full').style.display = 'none'; document.getElementById('2401.03835v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 7 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.00010">arXiv:2401.00010</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.00010">pdf</a>, <a href="https://arxiv.org/format/2401.00010">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Professional Network Matters: Connections Empower Person-Job Fit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+L">Lun Du</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Y">Yuxuan Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+S">Shi Han</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+Y">Yanbin Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+G">Guangming Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zi Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.00010v1-abstract-short" style="display: inline;"> Online recruitment platforms typically employ Person-Job Fit models in the core service that automatically match suitable job seekers with appropriate job positions. While existing works leverage historical or contextual information, they often disregard a crucial aspect: job seekers&#39; social relationships in professional networks. This paper emphasizes the importance of incorporating professional&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00010v1-abstract-full').style.display = 'inline'; document.getElementById('2401.00010v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.00010v1-abstract-full" style="display: none;"> Online recruitment platforms typically employ Person-Job Fit models in the core service that automatically match suitable job seekers with appropriate job positions. While existing works leverage historical or contextual information, they often disregard a crucial aspect: job seekers&#39; social relationships in professional networks. This paper emphasizes the importance of incorporating professional networks into the Person-Job Fit model. Our innovative approach consists of two stages: (1) defining a Workplace Heterogeneous Information Network (WHIN) to capture heterogeneous knowledge, including professional connections and pre-training representations of various entities using a heterogeneous graph neural network; (2) designing a Contextual Social Attention Graph Neural Network (CSAGNN) that supplements users&#39; missing information with professional connections&#39; contextual information. We introduce a job-specific attention mechanism in CSAGNN to handle noisy professional networks, leveraging pre-trained entity representations from WHIN. We demonstrate the effectiveness of our approach through experimental evaluations conducted across three real-world recruitment datasets from LinkedIn, showing superior performance compared to baseline models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00010v1-abstract-full').style.display = 'none'; document.getElementById('2401.00010v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at WSDM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.14472">arXiv:2312.14472</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.14472">pdf</a>, <a href="https://arxiv.org/format/2312.14472">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Not All Tasks Are Equally Difficult: Multi-Task Deep Reinforcement Learning with Dynamic Depth Routing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jinmin He</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+K">Kai Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zang%2C+Y">Yifan Zang</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+H">Haobo Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+J">Junliang Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jian Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.14472v2-abstract-short" style="display: inline;"> Multi-task reinforcement learning endeavors to accomplish a set of different tasks with a single policy. To enhance data efficiency by sharing parameters across multiple tasks, a common practice segments the network into distinct modules and trains a routing network to recombine these modules into task-specific policies. However, existing routing approaches employ a fixed number of modules for all&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14472v2-abstract-full').style.display = 'inline'; document.getElementById('2312.14472v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.14472v2-abstract-full" style="display: none;"> Multi-task reinforcement learning endeavors to accomplish a set of different tasks with a single policy. To enhance data efficiency by sharing parameters across multiple tasks, a common practice segments the network into distinct modules and trains a routing network to recombine these modules into task-specific policies. However, existing routing approaches employ a fixed number of modules for all tasks, neglecting that tasks with varying difficulties commonly require varying amounts of knowledge. This work presents a Dynamic Depth Routing (D2R) framework, which learns strategic skipping of certain intermediate modules, thereby flexibly choosing different numbers of modules for each task. Under this framework, we further introduce a ResRouting method to address the issue of disparate routing paths between behavior and target policies during off-policy training. In addition, we design an automatic route-balancing mechanism to encourage continued routing exploration for unmastered tasks without disturbing the routing of mastered ones. We conduct extensive experiments on various robotics manipulation tasks in the Meta-World benchmark, where D2R achieves state-of-the-art performance with significantly improved learning efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14472v2-abstract-full').style.display = 'none'; document.getElementById('2312.14472v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI2024, with supplementary material</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 38th AAAI Conference on Artificial Intelligence (AAAI2024), Vancouver, BC, Canada, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11537">arXiv:2312.11537</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.11537">pdf</a>, <a href="https://arxiv.org/format/2312.11537">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> FastSR-NeRF: Improving NeRF Efficiency on Consumer Devices with A Simple Super-Resolution Pipeline </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+C">Chien-Yu Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qichen Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Merth%2C+T">Thomas Merth</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+K">Karren Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Ranjan%2C+A">Anurag Ranjan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11537v2-abstract-short" style="display: inline;"> Super-resolution (SR) techniques have recently been proposed to upscale the outputs of neural radiance fields (NeRF) and generate high-quality images with enhanced inference speeds. However, existing NeRF+SR methods increase training overhead by using extra input features, loss functions, and/or expensive training procedures such as knowledge distillation. In this paper, we aim to leverage SR for&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11537v2-abstract-full').style.display = 'inline'; document.getElementById('2312.11537v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11537v2-abstract-full" style="display: none;"> Super-resolution (SR) techniques have recently been proposed to upscale the outputs of neural radiance fields (NeRF) and generate high-quality images with enhanced inference speeds. However, existing NeRF+SR methods increase training overhead by using extra input features, loss functions, and/or expensive training procedures such as knowledge distillation. In this paper, we aim to leverage SR for efficiency gains without costly training or architectural changes. Specifically, we build a simple NeRF+SR pipeline that directly combines existing modules, and we propose a lightweight augmentation technique, random patch sampling, for training. Compared to existing NeRF+SR methods, our pipeline mitigates the SR computing overhead and can be trained up to 23x faster, making it feasible to run on consumer devices such as the Apple MacBook. Experiments show our pipeline can upscale NeRF outputs by 2-4x while maintaining high quality, increasing inference speeds by up to 18x on an NVIDIA V100 GPU and 12.8x on an M1 Pro chip. We conclude that SR can be a simple but effective technique for improving the efficiency of NeRF models for consumer devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11537v2-abstract-full').style.display = 'none'; document.getElementById('2312.11537v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WACV 2024 (Oral)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.05639">arXiv:2312.05639</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.05639">pdf</a>, <a href="https://arxiv.org/format/2312.05639">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> JITSPMM: Just-in-Time Instruction Generation for Accelerated Sparse Matrix-Matrix Multiplication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Rolinger%2C+T+B">Thomas B. Rolinger</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+H+H">H. Howie Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.05639v1-abstract-short" style="display: inline;"> Achieving high performance for Sparse MatrixMatrix Multiplication (SpMM) has received increasing research attention, especially on multi-core CPUs, due to the large input data size in applications such as graph neural networks (GNNs). Most existing solutions for SpMM computation follow the aheadof-time (AOT) compilation approach, which compiles a program entirely before it is executed. AOT compila&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05639v1-abstract-full').style.display = 'inline'; document.getElementById('2312.05639v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.05639v1-abstract-full" style="display: none;"> Achieving high performance for Sparse MatrixMatrix Multiplication (SpMM) has received increasing research attention, especially on multi-core CPUs, due to the large input data size in applications such as graph neural networks (GNNs). Most existing solutions for SpMM computation follow the aheadof-time (AOT) compilation approach, which compiles a program entirely before it is executed. AOT compilation for SpMM faces three key limitations: unnecessary memory access, additional branch overhead, and redundant instructions. These limitations stem from the fact that crucial information pertaining to SpMM is not known until runtime. In this paper, we propose JITSPMM, a just-in-time (JIT) assembly code generation framework to accelerated SpMM computation on multi-core CPUs with SIMD extensions. First, JITSPMM integrates the JIT assembly code generation technique into three widely-used workload division methods for SpMM to achieve balanced workload distribution among CPU threads. Next, with the availability of runtime information, JITSPMM employs a novel technique, coarse-grain column merging, to maximize instruction-level parallelism by unrolling the performance-critical loop. Furthermore, JITSPMM intelligently allocates registers to cache frequently accessed data to minimizing memory accesses, and employs selected SIMD instructions to enhance arithmetic throughput. We conduct a performance evaluation of JITSPMM and compare it two AOT baselines. The first involves existing SpMM implementations compiled using the Intel icc compiler with auto-vectorization. The second utilizes the highly-optimized SpMM routine provided by Intel MKL. Our results show that JITSPMM provides an average improvement of 3.8x and 1.4x, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05639v1-abstract-full').style.display = 'none'; document.getElementById('2312.05639v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.10261">arXiv:2311.10261</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.10261">pdf</a>, <a href="https://arxiv.org/format/2311.10261">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Vision meets mmWave Radar: 3D Object Perception Benchmark for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yizhou Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jen-Hao Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jui-Te Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Kuan%2C+S">Sheng-Yao Kuan</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiqian Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Ni%2C+C">Chiming Ni</a>, <a href="/search/cs?searchtype=author&amp;query=Hao%2C+S">Shengyu Hao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+G">Gaoang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+G">Guanbin Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Hwang%2C+J">Jenq-Neng Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.10261v1-abstract-short" style="display: inline;"> Sensor fusion is crucial for an accurate and robust perception system on autonomous vehicles. Most existing datasets and perception solutions focus on fusing cameras and LiDAR. However, the collaboration between camera and radar is significantly under-exploited. The incorporation of rich semantic information from the camera, and reliable 3D information from the radar can potentially achieve an eff&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10261v1-abstract-full').style.display = 'inline'; document.getElementById('2311.10261v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.10261v1-abstract-full" style="display: none;"> Sensor fusion is crucial for an accurate and robust perception system on autonomous vehicles. Most existing datasets and perception solutions focus on fusing cameras and LiDAR. However, the collaboration between camera and radar is significantly under-exploited. The incorporation of rich semantic information from the camera, and reliable 3D information from the radar can potentially achieve an efficient, cheap, and portable solution for 3D object perception tasks. It can also be robust to different lighting or all-weather driving scenarios due to the capability of mmWave radars. In this paper, we introduce the CRUW3D dataset, including 66K synchronized and well-calibrated camera, radar, and LiDAR frames in various driving scenarios. Unlike other large-scale autonomous driving datasets, our radar data is in the format of radio frequency (RF) tensors that contain not only 3D location information but also spatio-temporal semantic information. This kind of radar format can enable machine learning models to generate more reliable object perception results after interacting and fusing the information or features between the camera and radar. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10261v1-abstract-full').style.display = 'none'; document.getElementById('2311.10261v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.08080">arXiv:2310.08080</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.08080">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RT-SRTS: Angle-Agnostic Real-Time Simultaneous 3D Reconstruction and Tumor Segmentation from Single X-Ray Projection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+M">Miao Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiming Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Mengxi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+B">Bojian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaoyan Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+F">Fugen Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.08080v2-abstract-short" style="display: inline;"> Radiotherapy is one of the primary treatment methods for tumors, but the organ movement caused by respiration limits its accuracy. Recently, 3D imaging from a single X-ray projection has received extensive attention as a promising approach to address this issue. However, current methods can only reconstruct 3D images without directly locating the tumor and are only validated for fixed-angle imagin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08080v2-abstract-full').style.display = 'inline'; document.getElementById('2310.08080v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.08080v2-abstract-full" style="display: none;"> Radiotherapy is one of the primary treatment methods for tumors, but the organ movement caused by respiration limits its accuracy. Recently, 3D imaging from a single X-ray projection has received extensive attention as a promising approach to address this issue. However, current methods can only reconstruct 3D images without directly locating the tumor and are only validated for fixed-angle imaging, which fails to fully meet the requirements of motion control in radiotherapy. In this study, a novel imaging method RT-SRTS is proposed which integrates 3D imaging and tumor segmentation into one network based on multi-task learning (MTL) and achieves real-time simultaneous 3D reconstruction and tumor segmentation from a single X-ray projection at any angle. Furthermore, the attention enhanced calibrator (AEC) and uncertain-region elaboration (URE) modules have been proposed to aid feature extraction and improve segmentation accuracy. The proposed method was evaluated on fifteen patient cases and compared with three state-of-the-art methods. It not only delivers superior 3D reconstruction but also demonstrates commendable tumor segmentation results. Simultaneous reconstruction and segmentation can be completed in approximately 70 ms, significantly faster than the required time threshold for real-time tumor tracking. The efficacies of both AEC and URE have also been validated in ablation studies. The code of work is available at https://github.com/ZywooSimple/RT-SRTS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08080v2-abstract-full').style.display = 'none'; document.getElementById('2310.08080v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.06648">arXiv:2310.06648</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.06648">pdf</a>, <a href="https://arxiv.org/format/2310.06648">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Diversity from Human Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+R">Ren-Jian Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xue%2C+K">Ke Xue</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yutong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+P">Peng Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+H">Haobo Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Qian%2C+C">Chao Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.06648v2-abstract-short" style="display: inline;"> Diversity plays a significant role in many problems, such as ensemble learning, reinforcement learning, and combinatorial optimization. How to define the diversity measure is a longstanding problem. Many methods rely on expert experience to define a proper behavior space and then obtain the diversity measure, which is, however, challenging in many scenarios. In this paper, we propose the problem o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06648v2-abstract-full').style.display = 'inline'; document.getElementById('2310.06648v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.06648v2-abstract-full" style="display: none;"> Diversity plays a significant role in many problems, such as ensemble learning, reinforcement learning, and combinatorial optimization. How to define the diversity measure is a longstanding problem. Many methods rely on expert experience to define a proper behavior space and then obtain the diversity measure, which is, however, challenging in many scenarios. In this paper, we propose the problem of learning a behavior space from human feedback and present a general method called Diversity from Human Feedback (DivHF) to solve it. DivHF learns a behavior descriptor consistent with human preference by querying human feedback. The learned behavior descriptor can be combined with any distance measure to define a diversity measure. We demonstrate the effectiveness of DivHF by integrating it with the Quality-Diversity optimization algorithm MAP-Elites and conducting experiments on the QDax suite. The results show that DivHF learns a behavior space that aligns better with human requirements compared to direct data-driven approaches and leads to more diverse solutions under human preference. Our contributions include formulating the problem, proposing the DivHF method, and demonstrating its effectiveness through experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06648v2-abstract-full').style.display = 'none'; document.getElementById('2310.06648v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.14623">arXiv:2309.14623</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.14623">pdf</a>, <a href="https://arxiv.org/format/2309.14623">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Text-to-Image Generation for Abstract Concepts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liao%2C+J">Jiayi Liao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+L">Lun Du</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xiangnan He</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xiang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+S">Shi Han</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dongmei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.14623v2-abstract-short" style="display: inline;"> Recent years have witnessed the substantial progress of large-scale models across various domains, such as natural language processing and computer vision, facilitating the expression of concrete concepts. Unlike concrete concepts that are usually directly associated with physical objects, expressing abstract concepts through natural language requires considerable effort, which results from their&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.14623v2-abstract-full').style.display = 'inline'; document.getElementById('2309.14623v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.14623v2-abstract-full" style="display: none;"> Recent years have witnessed the substantial progress of large-scale models across various domains, such as natural language processing and computer vision, facilitating the expression of concrete concepts. Unlike concrete concepts that are usually directly associated with physical objects, expressing abstract concepts through natural language requires considerable effort, which results from their intricate semantics and connotations. An alternative approach is to leverage images to convey rich visual information as a supplement. Nevertheless, existing Text-to-Image (T2I) models are primarily trained on concrete physical objects and tend to fail to visualize abstract concepts. Inspired by the three-layer artwork theory that identifies critical factors, intent, object and form during artistic creation, we propose a framework of Text-to-Image generation for Abstract Concepts (TIAC). The abstract concept is clarified into a clear intent with a detailed definition to avoid ambiguity. LLMs then transform it into semantic-related physical objects, and the concept-dependent form is retrieved from an LLM-extracted form pattern set. Information from these three aspects will be integrated to generate prompts for T2I models via LLM. Evaluation results from human assessments and our newly designed metric concept score demonstrate the effectiveness of our framework in creating images that can sufficiently express abstract concepts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.14623v2-abstract-full').style.display = 'none'; document.getElementById('2309.14623v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.09083">arXiv:2309.09083</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.09083">pdf</a>, <a href="https://arxiv.org/ps/2309.09083">ps</a>, <a href="https://arxiv.org/format/2309.09083">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FrameRS: A Video Frame Compression Model Composed by Self supervised Video Frame Reconstructor and Key Frame Selector </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiqian Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+G">Guanhong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+G">Gaoang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.09083v1-abstract-short" style="display: inline;"> In this paper, we present frame reconstruction model: FrameRS. It consists self-supervised video frame reconstructor and key frame selector. The frame reconstructor, FrameMAE, is developed by adapting the principles of the Masked Autoencoder for Images (MAE) for video context. The key frame selector, Frame Selector, is built on CNN architecture. By taking the high-level semantic information from t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09083v1-abstract-full').style.display = 'inline'; document.getElementById('2309.09083v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.09083v1-abstract-full" style="display: none;"> In this paper, we present frame reconstruction model: FrameRS. It consists self-supervised video frame reconstructor and key frame selector. The frame reconstructor, FrameMAE, is developed by adapting the principles of the Masked Autoencoder for Images (MAE) for video context. The key frame selector, Frame Selector, is built on CNN architecture. By taking the high-level semantic information from the encoder of FrameMAE as its input, it can predicted the key frames with low computation costs. Integrated with our bespoke Frame Selector, FrameMAE can effectively compress a video clip by retaining approximately 30% of its pivotal frames. Performance-wise, our model showcases computational efficiency and competitive accuracy, marking a notable improvement over traditional Key Frame Extract algorithms. The implementation is available on Github <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09083v1-abstract-full').style.display = 'none'; document.getElementById('2309.09083v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.08673">arXiv:2309.08673</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.08673">pdf</a>, <a href="https://arxiv.org/ps/2309.08673">ps</a>, <a href="https://arxiv.org/format/2309.08673">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> A Two-Level Linear Dependent Type Theory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiancheng Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Xi%2C+H">Hongwei Xi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.08673v1-abstract-short" style="display: inline;"> We present a type theory combining both linearity and dependency by stratifying typing rules into a level for logics and a level for programs. The distinction between logics and programs decouples their semantics, allowing the type system to assume tight resource bounds. A natural notion of irrelevancy is established where all proofs and types occurring inside programs are fully erasable without c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.08673v1-abstract-full').style.display = 'inline'; document.getElementById('2309.08673v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.08673v1-abstract-full" style="display: none;"> We present a type theory combining both linearity and dependency by stratifying typing rules into a level for logics and a level for programs. The distinction between logics and programs decouples their semantics, allowing the type system to assume tight resource bounds. A natural notion of irrelevancy is established where all proofs and types occurring inside programs are fully erasable without compromising their operational behavior. Through a heap-based operational semantics, we show that extracted programs always make computational progress and run memory clean. Additionally, programs can be freely reflected into the logical level for conducting deep proofs in the style of standard dependent type theories. This enables one to write resource safe programs and verify their correctness using a unified language. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.08673v1-abstract-full').style.display = 'none'; document.getElementById('2309.08673v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00964">arXiv:2309.00964</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.00964">pdf</a>, <a href="https://arxiv.org/format/2309.00964">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> eDKM: An Efficient and Accurate Train-time Weight Clustering for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cho%2C+M">Minsik Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Vahid%2C+K+A">Keivan A. Vahid</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qichen Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Adya%2C+S">Saurabh Adya</a>, <a href="/search/cs?searchtype=author&amp;query=Del+Mundo%2C+C+C">Carlo C Del Mundo</a>, <a href="/search/cs?searchtype=author&amp;query=Rastegari%2C+M">Mohammad Rastegari</a>, <a href="/search/cs?searchtype=author&amp;query=Naik%2C+D">Devang Naik</a>, <a href="/search/cs?searchtype=author&amp;query=Zatloukal%2C+P">Peter Zatloukal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00964v2-abstract-short" style="display: inline;"> Since Large Language Models or LLMs have demonstrated high-quality performance on many complex language tasks, there is a great interest in bringing these LLMs to mobile devices for faster responses and better privacy protection. However, the size of LLMs (i.e., billions of parameters) requires highly effective compression to fit into storage-limited devices. Among many compression techniques, wei&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00964v2-abstract-full').style.display = 'inline'; document.getElementById('2309.00964v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00964v2-abstract-full" style="display: none;"> Since Large Language Models or LLMs have demonstrated high-quality performance on many complex language tasks, there is a great interest in bringing these LLMs to mobile devices for faster responses and better privacy protection. However, the size of LLMs (i.e., billions of parameters) requires highly effective compression to fit into storage-limited devices. Among many compression techniques, weight-clustering, a form of non-linear quantization, is one of the leading candidates for LLM compression, and supported by modern smartphones. Yet, its training overhead is prohibitively significant for LLM fine-tuning. Especially, Differentiable KMeans Clustering, or DKM, has shown the state-of-the-art trade-off between compression ratio and accuracy regression, but its large memory complexity makes it nearly impossible to apply to train-time LLM compression. In this paper, we propose a memory-efficient DKM implementation, eDKM powered by novel techniques to reduce the memory footprint of DKM by orders of magnitudes. For a given tensor to be saved on CPU for the backward pass of DKM, we compressed the tensor by applying uniquification and sharding after checking if there is no duplicated tensor previously copied to CPU. Our experimental results demonstrate that \prjname can fine-tune and compress a pretrained LLaMA 7B model from 12.6 GB to 2.5 GB (3bit/weight) with the Alpaca dataset by reducing the train-time memory footprint of a decoder layer by 130$\times$, while delivering good accuracy on broader LLM benchmarks (i.e., 77.7% for PIQA, 66.1% for Winograde, and so on). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00964v2-abstract-full').style.display = 'none'; document.getElementById('2309.00964v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.07085">arXiv:2308.07085</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.07085">pdf</a>, <a href="https://arxiv.org/format/2308.07085">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Hue: A User-Adaptive Parser for Hybrid Logs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Junjielong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiuai Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Z">Zhouruixing Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+Y">Yutong Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhijing Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Y">Yuchi Ma</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+P">Pinjia He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.07085v1-abstract-short" style="display: inline;"> Log parsing, which extracts log templates from semi-structured logs and produces structured logs, is the first and the most critical step in automated log analysis. While existing log parsers have achieved decent results, they suffer from two major limitations by design. First, they do not natively support hybrid logs that consist of both single-line logs and multi-line logs (\eg Java Exception an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07085v1-abstract-full').style.display = 'inline'; document.getElementById('2308.07085v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.07085v1-abstract-full" style="display: none;"> Log parsing, which extracts log templates from semi-structured logs and produces structured logs, is the first and the most critical step in automated log analysis. While existing log parsers have achieved decent results, they suffer from two major limitations by design. First, they do not natively support hybrid logs that consist of both single-line logs and multi-line logs (\eg Java Exception and Hadoop Counters). Second, they fall short in integrating domain knowledge in parsing, making it hard to identify ambiguous tokens in logs. This paper defines a new research problem, \textit{hybrid log parsing}, as a superset of traditional log parsing tasks, and proposes \textit{Hue}, the first attempt for hybrid log parsing via a user-adaptive manner. Specifically, Hue converts each log message to a sequence of special wildcards using a key casting table and determines the log types via line aggregating and pattern extracting. In addition, Hue can effectively utilize user feedback via a novel merge-reject strategy, making it possible to quickly adapt to complex and changing log templates. We evaluated Hue on three hybrid log datasets and sixteen widely-used single-line log datasets (\ie Loghub). The results show that Hue achieves an average grouping accuracy of 0.845 on hybrid logs, which largely outperforms the best results (0.563 on average) obtained by existing parsers. Hue also exhibits SOTA performance on single-line log datasets. Furthermore, Hue has been successfully deployed in a real production environment for daily hybrid log parsing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07085v1-abstract-full').style.display = 'none'; document.getElementById('2308.07085v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ESEC/FSE 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.07708">arXiv:2307.07708</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.07708">pdf</a>, <a href="https://arxiv.org/format/2307.07708">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PSGformer: Enhancing 3D Point Cloud Instance Segmentation via Precise Semantic Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pan%2C+L">Lei Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Luan%2C+W">Wuyang Luan</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yuan Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Junhui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.07708v1-abstract-short" style="display: inline;"> Most existing 3D instance segmentation methods are derived from 3D semantic segmentation models. However, these indirect approaches suffer from certain limitations. They fail to fully leverage global and local semantic information for accurate prediction, which hampers the overall performance of the 3D instance segmentation framework. To address these issues, this paper presents PSGformer, a novel&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.07708v1-abstract-full').style.display = 'inline'; document.getElementById('2307.07708v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.07708v1-abstract-full" style="display: none;"> Most existing 3D instance segmentation methods are derived from 3D semantic segmentation models. However, these indirect approaches suffer from certain limitations. They fail to fully leverage global and local semantic information for accurate prediction, which hampers the overall performance of the 3D instance segmentation framework. To address these issues, this paper presents PSGformer, a novel 3D instance segmentation network. PSGformer incorporates two key advancements to enhance the performance of 3D instance segmentation. Firstly, we propose a Multi-Level Semantic Aggregation Module, which effectively captures scene features by employing foreground point filtering and multi-radius aggregation. This module enables the acquisition of more detailed semantic information from global and local perspectives. Secondly, PSGformer introduces a Parallel Feature Fusion Transformer Module that independently processes super-point features and aggregated features using transformers. The model achieves a more comprehensive feature representation by the features which connect global and local features. We conducted extensive experiments on the ScanNetv2 dataset. Notably, PSGformer exceeds compared state-of-the-art methods by 2.2% on ScanNetv2 hidden test set in terms of mAP. Our code and models will be publicly released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.07708v1-abstract-full').style.display = 'none'; document.getElementById('2307.07708v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.04349">arXiv:2307.04349</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.04349">pdf</a>, <a href="https://arxiv.org/format/2307.04349">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RLTF: Reinforcement Learning from Unit Test Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jiate Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yiqin Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+K">Kaiwen Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+X">Xiao Han</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+W">Wei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+D">Deheng Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.04349v2-abstract-short" style="display: inline;"> The goal of program synthesis, or code generation, is to generate executable code based on given descriptions. Recently, there has been an increasing number of studies employing reinforcement learning (RL) to improve the performance of large language models (LLMs) for code. However, current representative works either rely solely on offline frameworks, limiting the exploration of new sample spaces&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04349v2-abstract-full').style.display = 'inline'; document.getElementById('2307.04349v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.04349v2-abstract-full" style="display: none;"> The goal of program synthesis, or code generation, is to generate executable code based on given descriptions. Recently, there has been an increasing number of studies employing reinforcement learning (RL) to improve the performance of large language models (LLMs) for code. However, current representative works either rely solely on offline frameworks, limiting the exploration of new sample spaces, or fall short in the utilization of unit test signals, not accounting for specific error locations within the code. To address these issues, we propose RLTF, i.e., Reinforcement Learning from Unit Test Feedback, a novel online RL framework with unit test feedback of multi-granularity for refining code LLMs. Our approach generates data in real-time during training and simultaneously utilizes fine-grained feedback signals to guide the model towards producing higher-quality code. Extensive experiments show that RLTF achieves state-of-the-art performance on the APPS and the MBPP benchmarks. Our code is available at: https://github.com/Zyq-scut/RLTF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04349v2-abstract-full').style.display = 'none'; document.getElementById('2307.04349v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by TMLR</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.16884">arXiv:2306.16884</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.16884">pdf</a>, <a href="https://arxiv.org/format/2306.16884">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Policy Space Diversity for Non-Transitive Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yao%2C+J">Jian Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Weiming Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+H">Haobo Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yaodong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=McAleer%2C+S">Stephen McAleer</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+W">Wei Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.16884v2-abstract-short" style="display: inline;"> Policy-Space Response Oracles (PSRO) is an influential algorithm framework for approximating a Nash Equilibrium (NE) in multi-agent non-transitive games. Many previous studies have been trying to promote policy diversity in PSRO. A major weakness in existing diversity metrics is that a more diverse (according to their diversity metrics) population does not necessarily mean (as we proved in the pap&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.16884v2-abstract-full').style.display = 'inline'; document.getElementById('2306.16884v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.16884v2-abstract-full" style="display: none;"> Policy-Space Response Oracles (PSRO) is an influential algorithm framework for approximating a Nash Equilibrium (NE) in multi-agent non-transitive games. Many previous studies have been trying to promote policy diversity in PSRO. A major weakness in existing diversity metrics is that a more diverse (according to their diversity metrics) population does not necessarily mean (as we proved in the paper) a better approximation to a NE. To alleviate this problem, we propose a new diversity metric, the improvement of which guarantees a better approximation to a NE. Meanwhile, we develop a practical and well-justified method to optimize our diversity metric using only state-action samples. By incorporating our diversity regularization into the best response solving in PSRO, we obtain a new PSRO variant, Policy Space Diversity PSRO (PSD-PSRO). We present the convergence property of PSD-PSRO. Empirically, extensive experiments on various games demonstrate that PSD-PSRO is more effective in producing significantly less exploitable policies than state-of-the-art PSRO variants. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.16884v2-abstract-full').style.display = 'none'; document.getElementById('2306.16884v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.10715">arXiv:2306.10715</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.10715">pdf</a>, <a href="https://arxiv.org/format/2306.10715">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Maximum Entropy Heterogeneous-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jiarong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+Y">Yifan Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+S">Siyi Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+H">Haobo Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+X">Xiaojun Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yaodong Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.10715v4-abstract-short" style="display: inline;"> Multi-agent reinforcement learning (MARL) has been shown effective for cooperative games in recent years. However, existing state-of-the-art methods face challenges related to sample complexity, training instability, and the risk of converging to a suboptimal Nash Equilibrium. In this paper, we propose a unified framework for learning \emph{stochastic} policies to resolve these issues. We embed co&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10715v4-abstract-full').style.display = 'inline'; document.getElementById('2306.10715v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.10715v4-abstract-full" style="display: none;"> Multi-agent reinforcement learning (MARL) has been shown effective for cooperative games in recent years. However, existing state-of-the-art methods face challenges related to sample complexity, training instability, and the risk of converging to a suboptimal Nash Equilibrium. In this paper, we propose a unified framework for learning \emph{stochastic} policies to resolve these issues. We embed cooperative MARL problems into probabilistic graphical models, from which we derive the maximum entropy (MaxEnt) objective for MARL. Based on the MaxEnt framework, we propose Heterogeneous-Agent Soft Actor-Critic (HASAC) algorithm. Theoretically, we prove the monotonic improvement and convergence to quantal response equilibrium (QRE) properties of HASAC. Furthermore, we generalize a unified template for MaxEnt algorithmic design named Maximum Entropy Heterogeneous-Agent Mirror Learning (MEHAML), which provides any induced method with the same guarantees as HASAC. We evaluate HASAC on six benchmarks: Bi-DexHands, Multi-Agent MuJoCo, StarCraft Multi-Agent Challenge, Google Research Football, Multi-Agent Particle Environment, and Light Aircraft Game. Results show that HASAC consistently outperforms strong baselines, exhibiting better sample efficiency, robustness, and sufficient exploration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10715v4-abstract-full').style.display = 'none'; document.getElementById('2306.10715v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2024 spotlight</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.03624">arXiv:2306.03624</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.03624">pdf</a>, <a href="https://arxiv.org/format/2306.03624">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> On Manipulating Signals of User-Item Graph: A Jacobi Polynomial-based Graph Collaborative Filtering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+J">Jiayan Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+L">Lun Du</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+X">Xiaojun Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+S">Shi Han</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+D">Dongmei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.03624v1-abstract-short" style="display: inline;"> Collaborative filtering (CF) is an important research direction in recommender systems that aims to make recommendations given the information on user-item interactions. Graph CF has attracted more and more attention in recent years due to its effectiveness in leveraging high-order information in the user-item bipartite graph for better recommendations. Specifically, recent studies show the succes&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.03624v1-abstract-full').style.display = 'inline'; document.getElementById('2306.03624v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.03624v1-abstract-full" style="display: none;"> Collaborative filtering (CF) is an important research direction in recommender systems that aims to make recommendations given the information on user-item interactions. Graph CF has attracted more and more attention in recent years due to its effectiveness in leveraging high-order information in the user-item bipartite graph for better recommendations. Specifically, recent studies show the success of graph neural networks (GNN) for CF is attributed to its low-pass filtering effects. However, current researches lack a study of how different signal components contributes to recommendations, and how to design strategies to properly use them well. To this end, from the view of spectral transformation, we analyze the important factors that a graph filter should consider to achieve better performance. Based on the discoveries, we design JGCF, an efficient and effective method for CF based on Jacobi polynomial bases and frequency decomposition strategies. Extensive experiments on four widely used public datasets show the effectiveness and efficiency of the proposed methods, which brings at most 27.06% performance gain on Alibaba-iFashion. Besides, the experimental results also show that JGCF is better at handling sparse datasets, which shows potential in making recommendations for cold-start users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.03624v1-abstract-full').style.display = 'none'; document.getElementById('2306.03624v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.17185">arXiv:2305.17185</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.17185">pdf</a>, <a href="https://arxiv.org/format/2305.17185">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Image Quality Is Not All You Want: Task-Driven Lens Design for Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xinge Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Nie%2C+Y">Yunfeng Nie</a>, <a href="/search/cs?searchtype=author&amp;query=Heidrich%2C+W">Wolfgang Heidrich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.17185v1-abstract-short" style="display: inline;"> In computer vision, it has long been taken for granted that high-quality images obtained through well-designed camera lenses would lead to superior results. However, we find that this common perception is not a &#34;one-size-fits-all&#34; solution for diverse computer vision tasks. We demonstrate that task-driven and deep-learned simple optics can actually deliver better visual task performance. The Task-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17185v1-abstract-full').style.display = 'inline'; document.getElementById('2305.17185v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.17185v1-abstract-full" style="display: none;"> In computer vision, it has long been taken for granted that high-quality images obtained through well-designed camera lenses would lead to superior results. However, we find that this common perception is not a &#34;one-size-fits-all&#34; solution for diverse computer vision tasks. We demonstrate that task-driven and deep-learned simple optics can actually deliver better visual task performance. The Task-Driven lens design approach, which relies solely on a well-trained network model for supervision, is proven to be capable of designing lenses from scratch. Experimental results demonstrate the designed image classification lens (``TaskLens&#39;&#39;) exhibits higher accuracy compared to conventional imaging-driven lenses, even with fewer lens elements. Furthermore, we show that our TaskLens is compatible with various network models while maintaining enhanced classification accuracy. We propose that TaskLens holds significant potential, particularly when physical dimensions and cost are severely constrained. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17185v1-abstract-full').style.display = 'none'; document.getElementById('2305.17185v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Use an image classification network to supervise the lens design from scratch. The final designs can achieve higher accuracy with fewer optical elements</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.16683">arXiv:2305.16683</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.16683">pdf</a>, <a href="https://arxiv.org/format/2305.16683">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Future-conditioned Unsupervised Pretraining for Decision Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Z">Zhihui Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Z">Zichuan Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+D">Deheng Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qiang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+W">Wei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shuai Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.16683v1-abstract-short" style="display: inline;"> Recent research in offline reinforcement learning (RL) has demonstrated that return-conditioned supervised learning is a powerful paradigm for decision-making problems. While promising, return conditioning is limited to training data labeled with rewards and therefore faces challenges in learning from unsupervised data. In this work, we aim to utilize generalized future conditioning to enable effi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16683v1-abstract-full').style.display = 'inline'; document.getElementById('2305.16683v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.16683v1-abstract-full" style="display: none;"> Recent research in offline reinforcement learning (RL) has demonstrated that return-conditioned supervised learning is a powerful paradigm for decision-making problems. While promising, return conditioning is limited to training data labeled with rewards and therefore faces challenges in learning from unsupervised data. In this work, we aim to utilize generalized future conditioning to enable efficient unsupervised pretraining from reward-free and sub-optimal offline data. We propose Pretrained Decision Transformer (PDT), a conceptually simple approach for unsupervised RL pretraining. PDT leverages future trajectory information as a privileged context to predict actions during training. The ability to make decisions based on both present and future factors enhances PDT&#39;s capability for generalization. Besides, this feature can be easily incorporated into a return-conditioned framework for online finetuning, by assigning return values to possible futures and sampling future embeddings based on their respective values. Empirically, PDT outperforms or performs on par with its supervised pretraining counterpart, especially when dealing with sub-optimal data. Further analysis reveals that PDT can extract diverse behaviors from offline data and controllably sample high-return behaviors by online finetuning. Code is available at here. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16683v1-abstract-full').style.display = 'none'; document.getElementById('2305.16683v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 9 figures, ICML 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14748">arXiv:2305.14748</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.14748">pdf</a>, <a href="https://arxiv.org/format/2305.14748">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Towards Understanding Crypto Money Laundering in Web3 Through the Lenses of Ethereum Heists </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+D">Dan Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Jiajing Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+Q">Qishuang Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+Y">Yunmei Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+K">Kaixin Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zibin Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+S">Shuo Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14748v1-abstract-short" style="display: inline;"> With the overall momentum of the blockchain industry, crypto-based crimes are becoming more and more prevalent. After committing a crime, the main goal of cybercriminals is to obfuscate the source of the illicit funds in order to convert them into cash and get away with it. Many studies have analyzed money laundering in the field of the traditional financial sector and blockchain-based Bitcoin. Bu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14748v1-abstract-full').style.display = 'inline'; document.getElementById('2305.14748v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14748v1-abstract-full" style="display: none;"> With the overall momentum of the blockchain industry, crypto-based crimes are becoming more and more prevalent. After committing a crime, the main goal of cybercriminals is to obfuscate the source of the illicit funds in order to convert them into cash and get away with it. Many studies have analyzed money laundering in the field of the traditional financial sector and blockchain-based Bitcoin. But so far, little is known about the characteristics of crypto money laundering in the blockchain-based Web3 ecosystem. To fill this gap, and considering that Ethereum is the largest platform on Web3, in this paper, we systematically study the behavioral characteristics and economic impact of money laundering accounts through the lenses of Ethereum heists. Based on a very small number of tagged accounts of exchange hackers, DeFi exploiters, and scammers, we mine untagged money laundering groups through heuristic transaction tracking methods, to carve out a full picture of security incidents. By analyzing account characteristics and transaction networks, we obtain many interesting findings about crypto money laundering in Web3, observing the escalating money laundering methods such as creating counterfeit tokens and masquerading as speculators. Finally, based on these findings we provide inspiration for anti-money laundering to promote the healthy development of the Web3 ecosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14748v1-abstract-full').style.display = 'none'; document.getElementById('2305.14748v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Fu%2C+Q&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10