Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,413 results for author: <span class="mathjax">Zhou, X</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Zhou%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zhou, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zhou%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zhou, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15761">arXiv:2411.15761</a> <span> [<a href="https://arxiv.org/pdf/2411.15761">pdf</a>, <a href="https://arxiv.org/format/2411.15761">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MambaTrack: Exploiting Dual-Enhancement for Night UAV Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chunhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Li Liu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+H">Hao Wen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xi Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanfeng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15761v1-abstract-short" style="display: inline;"> Night unmanned aerial vehicle (UAV) tracking is impeded by the challenges of poor illumination, with previous daylight-optimized methods demonstrating suboptimal performance in low-light conditions, limiting the utility of UAV applications. To this end, we propose an efficient mamba-based tracker, leveraging dual enhancement techniques to boost night UAV tracking. The mamba-based low-light enhance… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15761v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15761v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15761v1-abstract-full" style="display: none;"> Night unmanned aerial vehicle (UAV) tracking is impeded by the challenges of poor illumination, with previous daylight-optimized methods demonstrating suboptimal performance in low-light conditions, limiting the utility of UAV applications. To this end, we propose an efficient mamba-based tracker, leveraging dual enhancement techniques to boost night UAV tracking. The mamba-based low-light enhancer, equipped with an illumination estimator and a damage restorer, achieves global image enhancement while preserving the details and structure of low-light images. Additionally, we advance a cross-modal mamba network to achieve efficient interactive learning between vision and language modalities. Extensive experiments showcase that our method achieves advanced performance and exhibits significantly improved computation and memory efficiency. For instance, our method is 2.8$\times$ faster than CiteTracker and reduces 50.2$\%$ GPU memory. Codes will be made publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15761v1-abstract-full').style.display = 'none'; document.getElementById('2411.15761v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14794">arXiv:2411.14794</a> <span> [<a href="https://arxiv.org/pdf/2411.14794">pdf</a>, <a href="https://arxiv.org/format/2411.14794">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+S">Songhao Han</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wei Huang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+H">Hairong Shi</a>, <a href="/search/cs?searchtype=author&query=Zhuo%2C+L">Le Zhuo</a>, <a href="/search/cs?searchtype=author&query=Su%2C+X">Xiu Su</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shifeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xu Zhou</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+X">Xiaojuan Qi</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+Y">Yue Liao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Si Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14794v1-abstract-short" style="display: inline;"> The advancement of Large Vision Language Models (LVLMs) has significantly improved multimodal understanding, yet challenges remain in video reasoning tasks due to the scarcity of high-quality, large-scale datasets. Existing video question-answering (VideoQA) datasets often rely on costly manual annotations with insufficient granularity or automatic construction methods with redundant frame-by-fram… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14794v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14794v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14794v1-abstract-full" style="display: none;"> The advancement of Large Vision Language Models (LVLMs) has significantly improved multimodal understanding, yet challenges remain in video reasoning tasks due to the scarcity of high-quality, large-scale datasets. Existing video question-answering (VideoQA) datasets often rely on costly manual annotations with insufficient granularity or automatic construction methods with redundant frame-by-frame analysis, limiting their scalability and effectiveness for complex reasoning. To address these challenges, we introduce VideoEspresso, a novel dataset that features VideoQA pairs preserving essential spatial details and temporal coherence, along with multimodal annotations of intermediate reasoning steps. Our construction pipeline employs a semantic-aware method to reduce redundancy, followed by generating QA pairs using GPT-4o. We further develop video Chain-of-Thought (CoT) annotations to enrich reasoning processes, guiding GPT-4o in extracting logical relationships from QA pairs and video content. To exploit the potential of high-quality VideoQA pairs, we propose a Hybrid LVLMs Collaboration framework, featuring a Frame Selector and a two-stage instruction fine-tuned reasoning LVLM. This framework adaptively selects core frames and performs CoT reasoning using multimodal evidence. Evaluated on our proposed benchmark with 14 tasks against 9 popular LVLMs, our method outperforms existing baselines on most tasks, demonstrating superior video reasoning capabilities. Our code and dataset will be released at: https://github.com/hshjerry/VideoEspresso <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14794v1-abstract-full').style.display = 'none'; document.getElementById('2411.14794v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12478">arXiv:2411.12478</a> <span> [<a href="https://arxiv.org/pdf/2411.12478">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Robotic transcatheter tricuspid valve replacement with hybrid enhanced intelligence: a new paradigm and first-in-vivo study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuangyi Wang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Haichuan Lin</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yiping Xie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziqi Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+L">Longyue Tan</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+X">Xilong Hou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiao-Hu Zhou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+S">Shengtao Lin</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+F">Fei Pan</a>, <a href="/search/cs?searchtype=author&query=So%2C+K+C">Kent Chak-Yu So</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+Z">Zeng-Guang Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12478v1-abstract-short" style="display: inline;"> Transcatheter tricuspid valve replacement (TTVR) is the latest treatment for tricuspid regurgitation and is in the early stages of clinical adoption. Intelligent robotic approaches are expected to overcome the challenges of surgical manipulation and widespread dissemination, but systems and protocols with high clinical utility have not yet been reported. In this study, we propose a complete soluti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12478v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12478v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12478v1-abstract-full" style="display: none;"> Transcatheter tricuspid valve replacement (TTVR) is the latest treatment for tricuspid regurgitation and is in the early stages of clinical adoption. Intelligent robotic approaches are expected to overcome the challenges of surgical manipulation and widespread dissemination, but systems and protocols with high clinical utility have not yet been reported. In this study, we propose a complete solution that includes a passive stabilizer, robotic drive, detachable delivery catheter and valve manipulation mechanism. Working towards autonomy, a hybrid augmented intelligence approach based on reinforcement learning, Monte Carlo probabilistic maps and human-robot co-piloted control was introduced. Systematic tests in phantom and first-in-vivo animal experiments were performed to verify that the system design met the clinical requirement. Furthermore, the experimental results confirmed the advantages of co-piloted control over conventional master-slave control in terms of time efficiency, control efficiency, autonomy and stability of operation. In conclusion, this study provides a comprehensive pathway for robotic TTVR and, to our knowledge, completes the first animal study that not only successfully demonstrates the application of hybrid enhanced intelligence in interventional robotics, but also provides a solution with high application value for a cutting-edge procedure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12478v1-abstract-full').style.display = 'none'; document.getElementById('2411.12478v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12364">arXiv:2411.12364</a> <span> [<a href="https://arxiv.org/pdf/2411.12364">pdf</a>, <a href="https://arxiv.org/format/2411.12364">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Ultra-Sparse Memory Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zihao Huang</a>, <a href="/search/cs?searchtype=author&query=Min%2C+Q">Qiyang Min</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hongzhi Huang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+D">Defa Zhu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yutao Zeng</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+R">Ran Guo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12364v1-abstract-short" style="display: inline;"> It is widely acknowledged that the performance of Transformer models is exponentially related to their number of parameters and computational complexity. While approaches like Mixture of Experts (MoE) decouple parameter count from computational complexity, they still face challenges in inference due to high memory access costs. This work introduces UltraMem, incorporating large-scale, ultra-sparse… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12364v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12364v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12364v1-abstract-full" style="display: none;"> It is widely acknowledged that the performance of Transformer models is exponentially related to their number of parameters and computational complexity. While approaches like Mixture of Experts (MoE) decouple parameter count from computational complexity, they still face challenges in inference due to high memory access costs. This work introduces UltraMem, incorporating large-scale, ultra-sparse memory layer to address these limitations. Our approach significantly reduces inference latency while maintaining model performance. We also investigate the scaling laws of this new architecture, demonstrating that it not only exhibits favorable scaling properties but outperforms traditional models. In our experiments, we train networks with up to 20 million memory slots. The results show that our method achieves state-of-the-art inference speed and model performance within a given computational budget. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12364v1-abstract-full').style.display = 'none'; document.getElementById('2411.12364v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12338">arXiv:2411.12338</a> <span> [<a href="https://arxiv.org/pdf/2411.12338">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Target Height Estimation Using a Single Acoustic Camera for Compensation in 2D Seabed Mosaicking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaoteng Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yusheng Wang</a>, <a href="/search/cs?searchtype=author&query=Mizuno%2C+K">Katsunori Mizuno</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12338v1-abstract-short" style="display: inline;"> This letter proposes a novel approach for compensating target height data in 2D seabed mosaicking for low-visibility underwater perception. Acoustic cameras are effective sensors for sensing the marine environments due to their high-resolution imaging capabilities and robustness to darkness and turbidity. However, the loss of elevation angle during the imaging process results in a lack of target h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12338v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12338v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12338v1-abstract-full" style="display: none;"> This letter proposes a novel approach for compensating target height data in 2D seabed mosaicking for low-visibility underwater perception. Acoustic cameras are effective sensors for sensing the marine environments due to their high-resolution imaging capabilities and robustness to darkness and turbidity. However, the loss of elevation angle during the imaging process results in a lack of target height information in the original acoustic camera images, leading to a simplistic 2D representation of the seabed mosaicking. In perceiving cluttered and unexplored marine environments, target height data is crucial for avoiding collisions with marine robots. This study proposes a novel approach for estimating seabed target height using a single acoustic camera and integrates height data into 2D seabed mosaicking to compensate for the missing 3D dimension of seabed targets. Unlike classic methods that model the loss of elevation angle to achieve seabed 3D reconstruction, this study focuses on utilizing available acoustic cast shadow clues and simple sensor motion to quickly estimate target height. The feasibility of our proposal is verified through a water tank experiment and a simulation experiment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12338v1-abstract-full').style.display = 'none'; document.getElementById('2411.12338v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages,conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11941">arXiv:2411.11941</a> <span> [<a href="https://arxiv.org/pdf/2411.11941">pdf</a>, <a href="https://arxiv.org/format/2411.11941">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TimeFormer: Capturing Temporal Relationships of Deformable 3D Gaussians for Robust Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+D">DaDong Jiang</a>, <a href="/search/cs?searchtype=author&query=Ke%2C+Z">Zhihui Ke</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaobo Zhou</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+Z">Zhi Hou</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xianghui Yang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+W">Wenbo Hu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+T">Tie Qiu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+C">Chunchao Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11941v1-abstract-short" style="display: inline;"> Dynamic scene reconstruction is a long-term challenge in 3D vision. Recent methods extend 3D Gaussian Splatting to dynamic scenes via additional deformation fields and apply explicit constraints like motion flow to guide the deformation. However, they learn motion changes from individual timestamps independently, making it challenging to reconstruct complex scenes, particularly when dealing with v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11941v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11941v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11941v1-abstract-full" style="display: none;"> Dynamic scene reconstruction is a long-term challenge in 3D vision. Recent methods extend 3D Gaussian Splatting to dynamic scenes via additional deformation fields and apply explicit constraints like motion flow to guide the deformation. However, they learn motion changes from individual timestamps independently, making it challenging to reconstruct complex scenes, particularly when dealing with violent movement, extreme-shaped geometries, or reflective surfaces. To address the above issue, we design a plug-and-play module called TimeFormer to enable existing deformable 3D Gaussians reconstruction methods with the ability to implicitly model motion patterns from a learning perspective. Specifically, TimeFormer includes a Cross-Temporal Transformer Encoder, which adaptively learns the temporal relationships of deformable 3D Gaussians. Furthermore, we propose a two-stream optimization strategy that transfers the motion knowledge learned from TimeFormer to the base stream during the training phase. This allows us to remove TimeFormer during inference, thereby preserving the original rendering speed. Extensive experiments in the multi-view and monocular dynamic scenes validate qualitative and quantitative improvement brought by TimeFormer. Project Page: https://patrickddj.github.io/TimeFormer/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11941v1-abstract-full').style.display = 'none'; document.getElementById('2411.11941v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11252">arXiv:2411.11252</a> <span> [<a href="https://arxiv.org/pdf/2411.11252">pdf</a>, <a href="https://arxiv.org/format/2411.11252">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DrivingSphere: Building a High-fidelity 4D World for Closed-loop Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+T">Tianyi Yan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Dongming Wu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+W">Wencheng Han</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Junpeng Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xia Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+K">Kun Zhan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+C">Cheng-zhong Xu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+J">Jianbing Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11252v1-abstract-short" style="display: inline;"> Autonomous driving evaluation requires simulation environments that closely replicate actual road conditions, including real-world sensory data and responsive feedback loops. However, many existing simulations need to predict waypoints along fixed routes on public datasets or synthetic photorealistic data, \ie, open-loop simulation usually lacks the ability to assess dynamic decision-making. While… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11252v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11252v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11252v1-abstract-full" style="display: none;"> Autonomous driving evaluation requires simulation environments that closely replicate actual road conditions, including real-world sensory data and responsive feedback loops. However, many existing simulations need to predict waypoints along fixed routes on public datasets or synthetic photorealistic data, \ie, open-loop simulation usually lacks the ability to assess dynamic decision-making. While the recent efforts of closed-loop simulation offer feedback-driven environments, they cannot process visual sensor inputs or produce outputs that differ from real-world data. To address these challenges, we propose DrivingSphere, a realistic and closed-loop simulation framework. Its core idea is to build 4D world representation and generate real-life and controllable driving scenarios. In specific, our framework includes a Dynamic Environment Composition module that constructs a detailed 4D driving world with a format of occupancy equipping with static backgrounds and dynamic objects, and a Visual Scene Synthesis module that transforms this data into high-fidelity, multi-view video outputs, ensuring spatial and temporal consistency. By providing a dynamic and realistic simulation environment, DrivingSphere enables comprehensive testing and validation of autonomous driving algorithms, ultimately advancing the development of more reliable autonomous cars. The benchmark will be publicly released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11252v1-abstract-full').style.display = 'none'; document.getElementById('2411.11252v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://yanty123.github.io/DrivingSphere/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11082">arXiv:2411.11082</a> <span> [<a href="https://arxiv.org/pdf/2411.11082">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> STOP: Spatiotemporal Orthogonal Propagation for Weight-Threshold-Leakage Synergistic Training of Deep Spiking Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+H">Haoran Gao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xichuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yingcheng Lin</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+M">Min Tian</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Liyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+C">Cong Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11082v1-abstract-short" style="display: inline;"> The prevailing of artificial intelligence-of-things calls for higher energy-efficient edge computing paradigms, such as neuromorphic agents leveraging brain-inspired spiking neural network (SNN) models based on spatiotemporally sparse binary activations. However, the lack of efficient and high-accuracy deep SNN learning algorithms prevents them from practical edge deployments with a strictly bound… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11082v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11082v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11082v1-abstract-full" style="display: none;"> The prevailing of artificial intelligence-of-things calls for higher energy-efficient edge computing paradigms, such as neuromorphic agents leveraging brain-inspired spiking neural network (SNN) models based on spatiotemporally sparse binary activations. However, the lack of efficient and high-accuracy deep SNN learning algorithms prevents them from practical edge deployments with a strictly bounded cost. In this paper, we propose a spatiotemporal orthogonal propagation (STOP) algorithm to tack this challenge. Our algorithm enables fully synergistic learning of synaptic weights as well as firing thresholds and leakage factors in spiking neurons to improve SNN accuracy, while under a unified temporally-forward trace-based framework to mitigate the huge memory requirement for storing neural states of all time-steps in the forward pass. Characteristically, the spatially-backward neuronal errors and temporally-forward traces propagate orthogonally to and independently of each other, substantially reducing computational overhead. Our STOP algorithm obtained high recognition accuracies of 99.53%, 94.84%, 74.92%, 98.26% and 77.10% on the MNIST, CIFAR-10, CIFAR-100, DVS-Gesture and DVS-CIFAR10 datasets with adequate SNNs of intermediate scales from LeNet-5 to ResNet-18. Compared with other deep SNN training works, our method is more plausible for edge intelligent scenarios where resources are limited but high-accuracy in-situ learning is desired. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11082v1-abstract-full').style.display = 'none'; document.getElementById('2411.11082v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages (exclude supplementary), 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10438">arXiv:2411.10438</a> <span> [<a href="https://arxiv.org/pdf/2411.10438">pdf</a>, <a href="https://arxiv.org/format/2411.10438">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> MARS: Unleashing the Power of Variance Reduction for Training Large Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+H">Huizhuo Yuan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yifeng Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Shuang Wu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xun Zhou</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Q">Quanquan Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10438v1-abstract-short" style="display: inline;"> Training deep neural networks--and more recently, large models--demands efficient and scalable optimizers. Adaptive gradient algorithms like Adam, AdamW, and their variants have been central to this task. Despite the development of numerous variance reduction algorithms in the past decade aimed at accelerating stochastic optimization in both convex and nonconvex settings, variance reduction has no… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10438v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10438v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10438v1-abstract-full" style="display: none;"> Training deep neural networks--and more recently, large models--demands efficient and scalable optimizers. Adaptive gradient algorithms like Adam, AdamW, and their variants have been central to this task. Despite the development of numerous variance reduction algorithms in the past decade aimed at accelerating stochastic optimization in both convex and nonconvex settings, variance reduction has not found widespread success in training deep neural networks or large language models. Consequently, it has remained a less favored approach in modern AI. In this paper, to unleash the power of variance reduction for efficient training of large models, we propose a unified optimization framework, MARS (Make vAriance Reduction Shine), which reconciles preconditioned gradient methods with variance reduction via a scaled stochastic recursive momentum technique. Within our framework, we introduce three instances of MARS that leverage preconditioned gradient updates based on AdamW, Lion, and Shampoo, respectively. We also draw a connection between our algorithms and existing optimizers. Experimental results on training GPT-2 models indicate that MARS consistently outperforms AdamW by a large margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10438v1-abstract-full').style.display = 'none'; document.getElementById('2411.10438v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 7 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09625">arXiv:2411.09625</a> <span> [<a href="https://arxiv.org/pdf/2411.09625">pdf</a>, <a href="https://arxiv.org/format/2411.09625">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Local deployment of large-scale music AI models on commodity hardware </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xun Zhou</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+C">Charlie Ruan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zihe Zhao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianqi Chen</a>, <a href="/search/cs?searchtype=author&query=Donahue%2C+C">Chris Donahue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09625v1-abstract-short" style="display: inline;"> We present the MIDInfinite, a web application capable of generating symbolic music using a large-scale generative AI model locally on commodity hardware. Creating this demo involved porting the Anticipatory Music Transformer, a large language model (LLM) pre-trained on the Lakh MIDI dataset, to the Machine Learning Compilation (MLC) framework. Once the model is ported, MLC facilitates inference on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09625v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09625v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09625v1-abstract-full" style="display: none;"> We present the MIDInfinite, a web application capable of generating symbolic music using a large-scale generative AI model locally on commodity hardware. Creating this demo involved porting the Anticipatory Music Transformer, a large language model (LLM) pre-trained on the Lakh MIDI dataset, to the Machine Learning Compilation (MLC) framework. Once the model is ported, MLC facilitates inference on a variety of runtimes including C++, mobile, and the browser. We envision that MLC has the potential to bridge the gap between the landscape of increasingly capable music AI models and technology more familiar to music software developers. As a proof of concept, we build a web application that allows users to generate endless streams of multi-instrumental MIDI in the browser, either from scratch or conditioned on a prompt. On commodity hardware (an M3 Macbook Pro), our demo can generate 51 notes per second, which is faster than real-time playback for 72.9% of generations, and increases to 86.3% with 2 seconds of upfront buffering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09625v1-abstract-full').style.display = 'none'; document.getElementById('2411.09625v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09425">arXiv:2411.09425</a> <span> [<a href="https://arxiv.org/pdf/2411.09425">pdf</a>, <a href="https://arxiv.org/format/2411.09425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> MARM: Unlocking the Future of Recommendation Systems through Memory Augmentation and Scalable Complexity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+X">Xiao Lv</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jiangxia Cao</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+S">Shijie Guan</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaoyou Zhou</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Z">Zhiguang Qi</a>, <a href="/search/cs?searchtype=author&query=Zang%2C+Y">Yaqiang Zang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Ben Wang</a>, <a href="/search/cs?searchtype=author&query=Gai%2C+K">Kun Gai</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+G">Guorui Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09425v1-abstract-short" style="display: inline;"> Scaling-law has guided the language model designing for past years, however, it is worth noting that the scaling laws of NLP cannot be directly applied to RecSys due to the following reasons: (1) The amount of training samples and model parameters is typically not the bottleneck for the model. Our recommendation system can generate over 50 billion user samples daily, and such a massive amount of t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09425v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09425v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09425v1-abstract-full" style="display: none;"> Scaling-law has guided the language model designing for past years, however, it is worth noting that the scaling laws of NLP cannot be directly applied to RecSys due to the following reasons: (1) The amount of training samples and model parameters is typically not the bottleneck for the model. Our recommendation system can generate over 50 billion user samples daily, and such a massive amount of training data can easily allow our model parameters to exceed 200 billion, surpassing many LLMs (about 100B). (2) To ensure the stability and robustness of the recommendation system, it is essential to control computational complexity FLOPs carefully. Considering the above differences with LLM, we can draw a conclusion that: for a RecSys model, compared to model parameters, the computational complexity FLOPs is a more expensive factor that requires careful control. In this paper, we propose our milestone work, MARM (Memory Augmented Recommendation Model), which explores a new cache scaling-laws successfully. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09425v1-abstract-full').style.display = 'none'; document.getElementById('2411.09425v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> N/A </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07658">arXiv:2411.07658</a> <span> [<a href="https://arxiv.org/pdf/2411.07658">pdf</a>, <a href="https://arxiv.org/format/2411.07658">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Advancing Sustainability via Recommender Systems: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Honglei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yixin Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaoxiong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhiqi Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07658v1-abstract-short" style="display: inline;"> Human behavioral patterns and consumption paradigms have emerged as pivotal determinants in environmental degradation and climate change, with quotidian decisions pertaining to transportation, energy utilization, and resource consumption collectively precipitating substantial ecological impacts. Recommender systems, which generate personalized suggestions based on user preferences and historical i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07658v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07658v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07658v1-abstract-full" style="display: none;"> Human behavioral patterns and consumption paradigms have emerged as pivotal determinants in environmental degradation and climate change, with quotidian decisions pertaining to transportation, energy utilization, and resource consumption collectively precipitating substantial ecological impacts. Recommender systems, which generate personalized suggestions based on user preferences and historical interaction data, exert considerable influence on individual behavioral trajectories. However, conventional recommender systems predominantly optimize for user engagement and economic metrics, inadvertently neglecting the environmental and societal ramifications of their recommendations, potentially catalyzing over-consumption and reinforcing unsustainable behavioral patterns. Given their instrumental role in shaping user decisions, there exists an imperative need for sustainable recommender systems that incorporate sustainability principles to foster eco-conscious and socially responsible choices. This comprehensive survey addresses this critical research gap by presenting a systematic analysis of sustainable recommender systems. As these systems can simultaneously advance multiple sustainability objectives--including resource conservation, sustainable consumer behavior, and social impact enhancement--examining their implementations across distinct application domains provides a more rigorous analytical framework. Through a methodological analysis of domain-specific implementations encompassing transportation, food, buildings, and auxiliary sectors, we can better elucidate how these systems holistically advance sustainability objectives while addressing sector-specific constraints and opportunities. Moreover, we delineate future research directions for evolving recommender systems beyond sustainability advocacy toward fostering environmental resilience and social consciousness in society. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07658v1-abstract-full').style.display = 'none'; document.getElementById('2411.07658v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20pages, 10 figures. Working paper: https://github.com/enoche/SusRec</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07395">arXiv:2411.07395</a> <span> [<a href="https://arxiv.org/pdf/2411.07395">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Data-Centric Learning Framework for Real-Time Detection of Aiming Beam in Fluorescence Lifetime Imaging Guided Surgery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hassan%2C+M+A">Mohamed Abul Hassan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+P">Pu Sun</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiangnan Zhou</a>, <a href="/search/cs?searchtype=author&query=Kraft%2C+L">Lisanne Kraft</a>, <a href="/search/cs?searchtype=author&query=Hadfield%2C+K+T">Kelsey T Hadfield</a>, <a href="/search/cs?searchtype=author&query=Ehrlich%2C+K">Katjana Ehrlich</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+J">Jinyi Qi</a>, <a href="/search/cs?searchtype=author&query=Birkeland%2C+A">Andrew Birkeland</a>, <a href="/search/cs?searchtype=author&query=Marcu%2C+L">Laura Marcu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07395v1-abstract-short" style="display: inline;"> This study introduces a novel data-centric approach to improve real-time surgical guidance using fiber-based fluorescence lifetime imaging (FLIm). A key aspect of the methodology is the accurate detection of the aiming beam, which is essential for localizing points used to map FLIm measurements onto the tissue region within the surgical field. The primary challenge arises from the complex and vari… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07395v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07395v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07395v1-abstract-full" style="display: none;"> This study introduces a novel data-centric approach to improve real-time surgical guidance using fiber-based fluorescence lifetime imaging (FLIm). A key aspect of the methodology is the accurate detection of the aiming beam, which is essential for localizing points used to map FLIm measurements onto the tissue region within the surgical field. The primary challenge arises from the complex and variable conditions encountered in the surgical environment, particularly in Transoral Robotic Surgery (TORS). Uneven illumination in the surgical field can cause reflections, reduce contrast, and results in inconsistent color representation, further complicating aiming beam detection. To overcome these challenges, an instance segmentation model was developed using a data-centric training strategy that improves accuracy by minimizing label noise and enhancing detection robustness. The model was evaluated on a dataset comprising 40 in vivo surgical videos, demonstrating a median detection rate of 85%. This performance was maintained when the model was integrated in a clinical system, achieving a similar detection rate of 85% during TORS procedures conducted in patients. The system's computational efficiency, measured at approximately 24 frames per second (FPS), was sufficient for real-time surgical guidance. This study enhances the reliability of FLIm-based aiming beam detection in complex surgical environments, advancing the feasibility of real-time, image-guided interventions for improved surgical precision <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07395v1-abstract-full').style.display = 'none'; document.getElementById('2411.07395v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07042">arXiv:2411.07042</a> <span> [<a href="https://arxiv.org/pdf/2411.07042">pdf</a>, <a href="https://arxiv.org/format/2411.07042">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Minion: A Technology Probe for Resolving Value Conflicts through Expert-Driven and User-Driven Strategies in AI Companion Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xianzhe Fan</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Q">Qing Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xuhui Zhou</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yuran Su</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zhicong Lu</a>, <a href="/search/cs?searchtype=author&query=Sap%2C+M">Maarten Sap</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Hong Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07042v1-abstract-short" style="display: inline;"> AI companions based on large language models can role-play and converse very naturally. When value conflicts arise between the AI companion and the user, it may offend or upset the user. Yet, little research has examined such conflicts. We first conducted a formative study that analyzed 151 user complaints about conflicts with AI companions, providing design implications for our study. Based on th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07042v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07042v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07042v1-abstract-full" style="display: none;"> AI companions based on large language models can role-play and converse very naturally. When value conflicts arise between the AI companion and the user, it may offend or upset the user. Yet, little research has examined such conflicts. We first conducted a formative study that analyzed 151 user complaints about conflicts with AI companions, providing design implications for our study. Based on these, we created Minion, a technology probe to help users resolve human-AI value conflicts. Minion applies a user-empowerment intervention method that provides suggestions by combining expert-driven and user-driven conflict resolution strategies. We conducted a technology probe study, creating 40 value conflict scenarios on Character.AI and Talkie. 22 participants completed 274 tasks and successfully resolved conflicts 94.16% of the time. We summarize user responses, preferences, and needs in resolving value conflicts, and propose design implications to reduce conflicts and empower users to resolve them more effectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07042v1-abstract-full').style.display = 'none'; document.getElementById('2411.07042v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06128">arXiv:2411.06128</a> <span> [<a href="https://arxiv.org/pdf/2411.06128">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Research on reinforcement learning based warehouse robot navigation algorithm in complex warehouse layout </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+K">Keqin Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lipeng Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiajing Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dezhi Yu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaofan Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Congyu Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06128v1-abstract-short" style="display: inline;"> In this paper, how to efficiently find the optimal path in complex warehouse layout and make real-time decision is a key problem. This paper proposes a new method of Proximal Policy Optimization (PPO) and Dijkstra's algorithm, Proximal policy-Dijkstra (PP-D). PP-D method realizes efficient strategy learning and real-time decision making through PPO, and uses Dijkstra algorithm to plan the global o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06128v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06128v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06128v1-abstract-full" style="display: none;"> In this paper, how to efficiently find the optimal path in complex warehouse layout and make real-time decision is a key problem. This paper proposes a new method of Proximal Policy Optimization (PPO) and Dijkstra's algorithm, Proximal policy-Dijkstra (PP-D). PP-D method realizes efficient strategy learning and real-time decision making through PPO, and uses Dijkstra algorithm to plan the global optimal path, thus ensuring high navigation accuracy and significantly improving the efficiency of path planning. Specifically, PPO enables robots to quickly adapt and optimize action strategies in dynamic environments through its stable policy updating mechanism. Dijkstra's algorithm ensures global optimal path planning in static environment. Finally, through the comparison experiment and analysis of the proposed framework with the traditional algorithm, the results show that the PP-D method has significant advantages in improving the accuracy of navigation prediction and enhancing the robustness of the system. Especially in complex warehouse layout, PP-D method can find the optimal path more accurately and reduce collision and stagnation. This proves the reliability and effectiveness of the robot in the study of complex warehouse layout navigation algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06128v1-abstract-full').style.display = 'none'; document.getElementById('2411.06128v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05278">arXiv:2411.05278</a> <span> [<a href="https://arxiv.org/pdf/2411.05278">pdf</a>, <a href="https://arxiv.org/format/2411.05278">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Integrated Location Sensing and Communication for Ultra-Massive MIMO With Hybrid-Field Beam-Squint Effect </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Zhen Gao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xingyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Ning%2C+B">Boyu Ning</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yu Su</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+T">Tong Qin</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05278v1-abstract-short" style="display: inline;"> The advent of ultra-massive multiple-input-multiple output systems holds great promise for next-generation communications, yet their channels exhibit hybrid far- and near- field beam-squint (HFBS) effect. In this paper, we not only overcome but also harness the HFBS effect to propose an integrated location sensing and communication (ILSC) framework. During the uplink training stage, user terminals… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05278v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05278v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05278v1-abstract-full" style="display: none;"> The advent of ultra-massive multiple-input-multiple output systems holds great promise for next-generation communications, yet their channels exhibit hybrid far- and near- field beam-squint (HFBS) effect. In this paper, we not only overcome but also harness the HFBS effect to propose an integrated location sensing and communication (ILSC) framework. During the uplink training stage, user terminals (UTs) transmit reference signals for simultaneous channel estimation and location sensing. This stage leverages an elaborately designed hybrid-field projection matrix to overcome the HFBS effect and estimate the channel in compressive manner. Subsequently, the scatterers' locations can be sensed from the spherical wavefront based on the channel estimation results. By treating the sensed scatterers as virtual anchors, we employ a weighted least-squares approach to derive UT' s location. Moreover, we propose an iterative refinement mechanism, which utilizes the accurately estimated time difference of arrival of multipath components to enhance location sensing precision. In the following downlink data transmission stage, we leverage the acquired location information to further optimize the hybrid beamformer, which combines the beam broadening and focusing to mitigate the spectral efficiency degradation resulted from the HFBS effect. Extensive simulation experiments demonstrate that the proposed ILSC scheme has superior location sensing and communication performance than conventional methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05278v1-abstract-full').style.display = 'none'; document.getElementById('2411.05278v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by IEEE JSAC</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04669">arXiv:2411.04669</a> <span> [<a href="https://arxiv.org/pdf/2411.04669">pdf</a>, <a href="https://arxiv.org/format/2411.04669">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EffiCANet: Efficient Time Series Forecasting with Convolutional Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xinxing Zhou</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jiaqi Ye</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shubao Zhao</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+M">Ming Jin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chengyi Yang</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yanlong Wen</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+X">Xiaojie Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04669v1-abstract-short" style="display: inline;"> The exponential growth of multivariate time series data from sensor networks in domains like industrial monitoring and smart cities requires efficient and accurate forecasting models. Current deep learning methods often fail to adequately capture long-range dependencies and complex inter-variable relationships, especially under real-time processing constraints. These limitations arise as many mode… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04669v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04669v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04669v1-abstract-full" style="display: none;"> The exponential growth of multivariate time series data from sensor networks in domains like industrial monitoring and smart cities requires efficient and accurate forecasting models. Current deep learning methods often fail to adequately capture long-range dependencies and complex inter-variable relationships, especially under real-time processing constraints. These limitations arise as many models are optimized for either short-term forecasting with limited receptive fields or long-term accuracy at the cost of efficiency. Additionally, dynamic and intricate interactions between variables in real-world data further complicate modeling efforts. To address these limitations, we propose EffiCANet, an Efficient Convolutional Attention Network designed to enhance forecasting accuracy while maintaining computational efficiency. EffiCANet integrates three key components: (1) a Temporal Large-kernel Decomposed Convolution (TLDC) module that captures long-term temporal dependencies while reducing computational overhead; (2) an Inter-Variable Group Convolution (IVGC) module that captures complex and evolving relationships among variables; and (3) a Global Temporal-Variable Attention (GTVA) mechanism that prioritizes critical temporal and inter-variable features. Extensive evaluations across nine benchmark datasets show that EffiCANet achieves the maximum reduction of 10.02% in MAE over state-of-the-art models, while cutting computational costs by 26.2% relative to conventional large-kernel convolution methods, thanks to its efficient decomposition strategy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04669v1-abstract-full').style.display = 'none'; document.getElementById('2411.04669v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03884">arXiv:2411.03884</a> <span> [<a href="https://arxiv.org/pdf/2411.03884">pdf</a>, <a href="https://arxiv.org/format/2411.03884">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Polynomial Composition Activations: Unleashing the Dynamics of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhuo%2C+Z">Zhijian Zhuo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Ya Wang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yutao Zeng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoqing Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xun Zhou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jinwen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03884v1-abstract-short" style="display: inline;"> Transformers have found extensive applications across various domains due to the powerful fitting capabilities. This success can be partially attributed to their inherent nonlinearity. Thus, in addition to the ReLU function employed in the original transformer architecture, researchers have explored alternative modules such as GeLU and SwishGLU to enhance nonlinearity and thereby augment represent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03884v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03884v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03884v1-abstract-full" style="display: none;"> Transformers have found extensive applications across various domains due to the powerful fitting capabilities. This success can be partially attributed to their inherent nonlinearity. Thus, in addition to the ReLU function employed in the original transformer architecture, researchers have explored alternative modules such as GeLU and SwishGLU to enhance nonlinearity and thereby augment representational capacity. In this paper, we propose a novel category of polynomial composition activations (PolyCom), designed to optimize the dynamics of transformers. Theoretically, we provide a comprehensive mathematical analysis of PolyCom, highlighting its enhanced expressivity and efficacy relative to other activation functions. Notably, we demonstrate that networks incorporating PolyCom achieve the $\textbf{optimal approximation rate}$, indicating that PolyCom networks require minimal parameters to approximate general smooth functions in Sobolev spaces. We conduct empirical experiments on the pre-training configurations of large language models (LLMs), including both dense and sparse architectures. By substituting conventional activation functions with PolyCom, we enable LLMs to capture higher-order interactions within the data, thus improving performance metrics in terms of accuracy and convergence rates. Extensive experimental results demonstrate the effectiveness of our method, showing substantial improvements over other activation functions. Code is available at https://github.com/BryceZhuo/PolyCom. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03884v1-abstract-full').style.display = 'none'; document.getElementById('2411.03884v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02353">arXiv:2411.02353</a> <span> [<a href="https://arxiv.org/pdf/2411.02353">pdf</a>, <a href="https://arxiv.org/format/2411.02353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Social-RAG: Retrieving from Group Interactions to Socially Ground Proactive AI Generation to Group Preferences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruotong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xinyi Zhou</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+L">Lin Qiu</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+J+C">Joseph Chee Chang</a>, <a href="/search/cs?searchtype=author&query=Bragg%2C+J">Jonathan Bragg</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+A+X">Amy X. Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02353v1-abstract-short" style="display: inline;"> AI agents are increasingly tasked with making proactive suggestions in online spaces where groups collaborate, but can be unhelpful or even annoying, due to not fitting the group's preferences or behaving in socially inappropriate ways. Fortunately, group spaces have a rich history of prior social interactions and affordances for social feedback to support creating agents that align to a group's i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02353v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02353v1-abstract-full" style="display: none;"> AI agents are increasingly tasked with making proactive suggestions in online spaces where groups collaborate, but can be unhelpful or even annoying, due to not fitting the group's preferences or behaving in socially inappropriate ways. Fortunately, group spaces have a rich history of prior social interactions and affordances for social feedback to support creating agents that align to a group's interests and norms. We present Social-RAG, a workflow for grounding agents to social information about a group, which retrieves from prior group interactions, selects relevant social signals, and then feeds the context into a large language model to generate messages to the group. We implement this into PaperPing, our system that posts academic paper recommendations in group chat, leveraging social signals determined from formative studies with 39 researchers. From a three-month deployment in 18 channels, we observed PaperPing posted relevant messages in groups without disrupting their existing social practices, fostering group common ground. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02353v1-abstract-full').style.display = 'none'; document.getElementById('2411.02353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01842">arXiv:2411.01842</a> <span> [<a href="https://arxiv.org/pdf/2411.01842">pdf</a>, <a href="https://arxiv.org/format/2411.01842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> ElasTST: Towards Robust Varied-Horizon Forecasting with Elastic Time-Series Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiawen Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Shun Zheng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+X">Xumeng Wen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaofang Zhou</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+J">Jiang Bian</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jia Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01842v1-abstract-short" style="display: inline;"> Numerous industrial sectors necessitate models capable of providing robust forecasts across various horizons. Despite the recent strides in crafting specific architectures for time-series forecasting and developing pre-trained universal models, a comprehensive examination of their capability in accommodating varied-horizon forecasting during inference is still lacking. This paper bridges this gap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01842v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01842v1-abstract-full" style="display: none;"> Numerous industrial sectors necessitate models capable of providing robust forecasts across various horizons. Despite the recent strides in crafting specific architectures for time-series forecasting and developing pre-trained universal models, a comprehensive examination of their capability in accommodating varied-horizon forecasting during inference is still lacking. This paper bridges this gap through the design and evaluation of the Elastic Time-Series Transformer (ElasTST). The ElasTST model incorporates a non-autoregressive design with placeholders and structured self-attention masks, warranting future outputs that are invariant to adjustments in inference horizons. A tunable version of rotary position embedding is also integrated into ElasTST to capture time-series-specific periods and enhance adaptability to different horizons. Additionally, ElasTST employs a multi-scale patch design, effectively integrating both fine-grained and coarse-grained information. During the training phase, ElasTST uses a horizon reweighting strategy that approximates the effect of random sampling across multiple horizons with a single fixed horizon setting. Through comprehensive experiments and comparisons with state-of-the-art time-series architectures and contemporary foundation models, we demonstrate the efficacy of ElasTST's unique design elements. Our findings position ElasTST as a robust solution for the practical necessity of varied-horizon forecasting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01842v1-abstract-full').style.display = 'none'; document.getElementById('2411.01842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01589">arXiv:2411.01589</a> <span> [<a href="https://arxiv.org/pdf/2411.01589">pdf</a>, <a href="https://arxiv.org/format/2411.01589">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> BiT-MamSleep: Bidirectional Temporal Mamba for EEG Sleep Staging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xinliang Zhou</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yuzhe Han</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhisheng Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chenyu Liu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yi Ding</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+Z">Ziyu Jia</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01589v2-abstract-short" style="display: inline;"> In this paper, we address the challenges in automatic sleep stage classification, particularly the high computational cost, inadequate modeling of bidirectional temporal dependencies, and class imbalance issues faced by Transformer-based models. To address these limitations, we propose BiT-MamSleep, a novel architecture that integrates the Triple-Resolution CNN (TRCNN) for efficient multi-scale fe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01589v2-abstract-full').style.display = 'inline'; document.getElementById('2411.01589v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01589v2-abstract-full" style="display: none;"> In this paper, we address the challenges in automatic sleep stage classification, particularly the high computational cost, inadequate modeling of bidirectional temporal dependencies, and class imbalance issues faced by Transformer-based models. To address these limitations, we propose BiT-MamSleep, a novel architecture that integrates the Triple-Resolution CNN (TRCNN) for efficient multi-scale feature extraction with the Bidirectional Mamba (BiMamba) mechanism, which models both short- and long-term temporal dependencies through bidirectional processing of EEG data. Additionally, BiT-MamSleep incorporates an Adaptive Feature Recalibration (AFR) module and a temporal enhancement block to dynamically refine feature importance, optimizing classification accuracy without increasing computational complexity. To further improve robustness, we apply optimization techniques such as Focal Loss and SMOTE to mitigate class imbalance. Extensive experiments on four public datasets demonstrate that BiT-MamSleep significantly outperforms state-of-the-art methods, particularly in handling long EEG sequences and addressing class imbalance, leading to more accurate and scalable sleep stage classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01589v2-abstract-full').style.display = 'none'; document.getElementById('2411.01589v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01302">arXiv:2411.01302</a> <span> [<a href="https://arxiv.org/pdf/2411.01302">pdf</a>, <a href="https://arxiv.org/format/2411.01302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> Regret of exploratory policy improvement and $q$-learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+W">Wenpin Tang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X+Y">Xun Yu Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01302v1-abstract-short" style="display: inline;"> We study the convergence of $q$-learning and related algorithms introduced by Jia and Zhou (J. Mach. Learn. Res., 24 (2023), 161) for controlled diffusion processes. Under suitable conditions on the growth and regularity of the model parameters, we provide a quantitative error and regret analysis of both the exploratory policy improvement algorithm and the $q$-learning algorithm. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01302v1-abstract-full" style="display: none;"> We study the convergence of $q$-learning and related algorithms introduced by Jia and Zhou (J. Mach. Learn. Res., 24 (2023), 161) for controlled diffusion processes. Under suitable conditions on the growth and regularity of the model parameters, we provide a quantitative error and regret analysis of both the exploratory policy improvement algorithm and the $q$-learning algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01302v1-abstract-full').style.display = 'none'; document.getElementById('2411.01302v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22793">arXiv:2410.22793</a> <span> [<a href="https://arxiv.org/pdf/2410.22793">pdf</a>, <a href="https://arxiv.org/format/2410.22793">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Less is More: DocString Compression in Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+G">Guang Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yu Zhou</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wei Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiangyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&query=Zhuo%2C+T+Y">Terry Yue Zhuo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Ke Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+D">David Lo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Taolue Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22793v2-abstract-short" style="display: inline;"> The widespread use of Large Language Models (LLMs) in software engineering has intensified the need for improved model and resource efficiency. In particular, for neural code generation, LLMs are used to translate function/method signature and DocString to executable code. DocStrings which capture user re quirements for the code and used as the prompt for LLMs, often contains redundant information… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22793v2-abstract-full').style.display = 'inline'; document.getElementById('2410.22793v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22793v2-abstract-full" style="display: none;"> The widespread use of Large Language Models (LLMs) in software engineering has intensified the need for improved model and resource efficiency. In particular, for neural code generation, LLMs are used to translate function/method signature and DocString to executable code. DocStrings which capture user re quirements for the code and used as the prompt for LLMs, often contains redundant information. Recent advancements in prompt compression have shown promising results in Natural Language Processing (NLP), but their applicability to code generation remains uncertain. Our empirical study show that the state-of-the-art prompt compression methods achieve only about 10% reduction, as further reductions would cause significant performance degradation. In our study, we propose a novel compression method, ShortenDoc, dedicated to DocString compression for code generation. Our extensive experiments on six code generation datasets, five open-source LLMs (1B to 10B parameters), and one closed-source LLM GPT-4o confirm that ShortenDoc achieves 25-40% compression while preserving the quality of generated code, outperforming other baseline methods at similar compression levels. The benefit of this research is to improve efficiency and reduce the cost while maintaining the quality of the generated code, especially when calling third-party APIs, and is able to reduce the token processing cost by 25-40%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22793v2-abstract-full').style.display = 'none'; document.getElementById('2410.22793v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">UNDER REVIEW</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22662">arXiv:2410.22662</a> <span> [<a href="https://arxiv.org/pdf/2410.22662">pdf</a>, <a href="https://arxiv.org/format/2410.22662">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> $\textbf{EMOS}$: $\textbf{E}$mbodiment-aware Heterogeneous $\textbf{M}$ulti-robot $\textbf{O}$perating $\textbf{S}$ystem with LLM Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Junting Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Checheng Yu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xunzhe Zhou</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+T">Tianqi Xu</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+Y">Yao Mu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+M">Mengkang Hu</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+W">Wenqi Shao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yikai Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guohao Li</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+L">Lin Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22662v1-abstract-short" style="display: inline;"> Heterogeneous multi-robot systems (HMRS) have emerged as a powerful approach for tackling complex tasks that single robots cannot manage alone. Current large-language-model-based multi-agent systems (LLM-based MAS) have shown success in areas like software development and operating systems, but applying these systems to robot control presents unique challenges. In particular, the capabilities of e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22662v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22662v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22662v1-abstract-full" style="display: none;"> Heterogeneous multi-robot systems (HMRS) have emerged as a powerful approach for tackling complex tasks that single robots cannot manage alone. Current large-language-model-based multi-agent systems (LLM-based MAS) have shown success in areas like software development and operating systems, but applying these systems to robot control presents unique challenges. In particular, the capabilities of each agent in a multi-robot system are inherently tied to the physical composition of the robots, rather than predefined roles. To address this issue, we introduce a novel multi-agent framework designed to enable effective collaboration among heterogeneous robots with varying embodiments and capabilities, along with a new benchmark named Habitat-MAS. One of our key designs is $\textit{Robot Resume}$: Instead of adopting human-designed role play, we propose a self-prompted approach, where agents comprehend robot URDF files and call robot kinematics tools to generate descriptions of their physics capabilities to guide their behavior in task planning and action execution. The Habitat-MAS benchmark is designed to assess how a multi-agent framework handles tasks that require embodiment-aware reasoning, which includes 1) manipulation, 2) perception, 3) navigation, and 4) comprehensive multi-floor object rearrangement. The experimental results indicate that the robot's resume and the hierarchical design of our multi-agent system are essential for the effective operation of the heterogeneous multi-robot system within this intricate problem context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22662v1-abstract-full').style.display = 'none'; document.getElementById('2410.22662v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages of main content, 3 pages of references, 5 pages of appendix, 7 figures in total</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7; I.2.8; I.2.9; I.2.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20711">arXiv:2410.20711</a> <span> [<a href="https://arxiv.org/pdf/2410.20711">pdf</a>, <a href="https://arxiv.org/format/2410.20711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Contextual Representation Anchor Network to Alleviate Selection Bias in Few-Shot Drug Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruifeng Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wei Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiangxin Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mingqian Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyang Chen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xuemin Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20711v2-abstract-short" style="display: inline;"> In the drug discovery process, the low success rate of drug candidate screening often leads to insufficient labeled data, causing the few-shot learning problem in molecular property prediction. Existing methods for few-shot molecular property prediction overlook the sample selection bias, which arises from non-random sample selection in chemical experiments. This bias in data representativeness le… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20711v2-abstract-full').style.display = 'inline'; document.getElementById('2410.20711v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20711v2-abstract-full" style="display: none;"> In the drug discovery process, the low success rate of drug candidate screening often leads to insufficient labeled data, causing the few-shot learning problem in molecular property prediction. Existing methods for few-shot molecular property prediction overlook the sample selection bias, which arises from non-random sample selection in chemical experiments. This bias in data representativeness leads to suboptimal performance. To overcome this challenge, we present a novel method named contextual representation anchor Network (CRA), where an anchor refers to a cluster center of the representations of molecules and serves as a bridge to transfer enriched contextual knowledge into molecular representations and enhance their expressiveness. CRA introduces a dual-augmentation mechanism that includes context augmentation, which dynamically retrieves analogous unlabeled molecules and captures their task-specific contextual knowledge to enhance the anchors, and anchor augmentation, which leverages the anchors to augment the molecular representations. We evaluate our approach on the MoleculeNet and FS-Mol benchmarks, as well as in domain transfer experiments. The results demonstrate that CRA outperforms the state-of-the-art by 2.60% and 3.28% in AUC and $螖$AUC-PR metrics, respectively, and exhibits superior generalization capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20711v2-abstract-full').style.display = 'none'; document.getElementById('2410.20711v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 7 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68U07 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20688">arXiv:2410.20688</a> <span> [<a href="https://arxiv.org/pdf/2410.20688">pdf</a>, <a href="https://arxiv.org/format/2410.20688">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Reprogramming Pretrained Target-Specific Diffusion Models for Dual-Target Drug Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiangxin Zhou</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+J">Jiaqi Guan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yijia Zhang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+X">Xingang Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianzhu Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20688v2-abstract-short" style="display: inline;"> Dual-target therapeutic strategies have become a compelling approach and attracted significant attention due to various benefits, such as their potential in overcoming drug resistance in cancer therapy. Considering the tremendous success that deep generative models have achieved in structure-based drug design in recent years, we formulate dual-target drug design as a generative task and curate a n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20688v2-abstract-full').style.display = 'inline'; document.getElementById('2410.20688v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20688v2-abstract-full" style="display: none;"> Dual-target therapeutic strategies have become a compelling approach and attracted significant attention due to various benefits, such as their potential in overcoming drug resistance in cancer therapy. Considering the tremendous success that deep generative models have achieved in structure-based drug design in recent years, we formulate dual-target drug design as a generative task and curate a novel dataset of potential target pairs based on synergistic drug combinations. We propose to design dual-target drugs with diffusion models that are trained on single-target protein-ligand complex pairs. Specifically, we align two pockets in 3D space with protein-ligand binding priors and build two complex graphs with shared ligand nodes for SE(3)-equivariant composed message passing, based on which we derive a composed drift in both 3D and categorical probability space in the generative process. Our algorithm can well transfer the knowledge gained in single-target pretraining to dual-target scenarios in a zero-shot manner. We also repurpose linker design methods as strong baselines for this task. Extensive experiments demonstrate the effectiveness of our method compared with various baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20688v2-abstract-full').style.display = 'none'; document.getElementById('2410.20688v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20050">arXiv:2410.20050</a> <span> [<a href="https://arxiv.org/pdf/2410.20050">pdf</a>, <a href="https://arxiv.org/format/2410.20050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AutoMIR: Effective Zero-Shot Medical Information Retrieval without Relevance Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+L">Lei Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiangxu Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiao Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zheng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20050v1-abstract-short" style="display: inline;"> Medical information retrieval (MIR) is essential for retrieving relevant medical knowledge from diverse sources, including electronic health records, scientific literature, and medical databases. However, achieving effective zero-shot dense retrieval in the medical domain poses substantial challenges due to the lack of relevance-labeled data. In this paper, we introduce a novel approach called Sel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20050v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20050v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20050v1-abstract-full" style="display: none;"> Medical information retrieval (MIR) is essential for retrieving relevant medical knowledge from diverse sources, including electronic health records, scientific literature, and medical databases. However, achieving effective zero-shot dense retrieval in the medical domain poses substantial challenges due to the lack of relevance-labeled data. In this paper, we introduce a novel approach called Self-Learning Hypothetical Document Embeddings (SL-HyDE) to tackle this issue. SL-HyDE leverages large language models (LLMs) as generators to generate hypothetical documents based on a given query. These generated documents encapsulate key medical context, guiding a dense retriever in identifying the most relevant documents. The self-learning framework progressively refines both pseudo-document generation and retrieval, utilizing unlabeled medical corpora without requiring any relevance-labeled data. Additionally, we present the Chinese Medical Information Retrieval Benchmark (CMIRB), a comprehensive evaluation framework grounded in real-world medical scenarios, encompassing five tasks and ten datasets. By benchmarking ten models on CMIRB, we establish a rigorous standard for evaluating medical information retrieval systems. Experimental results demonstrate that SL-HyDE significantly surpasses existing methods in retrieval accuracy while showcasing strong generalization and scalability across various LLM and retriever configurations. CMIRB data and evaluation code are publicly available at: https://github.com/CMIRB-benchmark/CMIRB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20050v1-abstract-full').style.display = 'none'; document.getElementById('2410.20050v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18666">arXiv:2410.18666</a> <span> [<a href="https://arxiv.org/pdf/2410.18666">pdf</a>, <a href="https://arxiv.org/format/2410.18666">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DreamClear: High-Capacity Real-World Image Restoration with Privacy-Safe Dataset Curation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ai%2C+Y">Yuang Ai</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaoqiang Zhou</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Huaibo Huang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xiaotian Han</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhengyu Chen</a>, <a href="/search/cs?searchtype=author&query=You%2C+Q">Quanzeng You</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongxia Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18666v2-abstract-short" style="display: inline;"> Image restoration (IR) in real-world scenarios presents significant challenges due to the lack of high-capacity models and comprehensive datasets. To tackle these issues, we present a dual strategy: GenIR, an innovative data curation pipeline, and DreamClear, a cutting-edge Diffusion Transformer (DiT)-based image restoration model. GenIR, our pioneering contribution, is a dual-prompt learning pipe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18666v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18666v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18666v2-abstract-full" style="display: none;"> Image restoration (IR) in real-world scenarios presents significant challenges due to the lack of high-capacity models and comprehensive datasets. To tackle these issues, we present a dual strategy: GenIR, an innovative data curation pipeline, and DreamClear, a cutting-edge Diffusion Transformer (DiT)-based image restoration model. GenIR, our pioneering contribution, is a dual-prompt learning pipeline that overcomes the limitations of existing datasets, which typically comprise only a few thousand images and thus offer limited generalizability for larger models. GenIR streamlines the process into three stages: image-text pair construction, dual-prompt based fine-tuning, and data generation & filtering. This approach circumvents the laborious data crawling process, ensuring copyright compliance and providing a cost-effective, privacy-safe solution for IR dataset construction. The result is a large-scale dataset of one million high-quality images. Our second contribution, DreamClear, is a DiT-based image restoration model. It utilizes the generative priors of text-to-image (T2I) diffusion models and the robust perceptual capabilities of multi-modal large language models (MLLMs) to achieve photorealistic restoration. To boost the model's adaptability to diverse real-world degradations, we introduce the Mixture of Adaptive Modulator (MoAM). It employs token-wise degradation priors to dynamically integrate various restoration experts, thereby expanding the range of degradations the model can address. Our exhaustive experiments confirm DreamClear's superior performance, underlining the efficacy of our dual strategy for real-world image restoration. Code and pre-trained models are available at: https://github.com/shallowdream204/DreamClear. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18666v2-abstract-full').style.display = 'none'; document.getElementById('2410.18666v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16618">arXiv:2410.16618</a> <span> [<a href="https://arxiv.org/pdf/2410.16618">pdf</a>, <a href="https://arxiv.org/format/2410.16618">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SoK: Dataset Copyright Auditing in Machine Learning Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+L">Linkang Du</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xuanru Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Min Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chusong Zhang</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhou Su</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+P">Peng Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiming Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhikun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16618v1-abstract-short" style="display: inline;"> As the implementation of machine learning (ML) systems becomes more widespread, especially with the introduction of larger ML models, we perceive a spring demand for massive data. However, it inevitably causes infringement and misuse problems with the data, such as using unauthorized online artworks or face images to train ML models. To address this problem, many efforts have been made to audit th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16618v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16618v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16618v1-abstract-full" style="display: none;"> As the implementation of machine learning (ML) systems becomes more widespread, especially with the introduction of larger ML models, we perceive a spring demand for massive data. However, it inevitably causes infringement and misuse problems with the data, such as using unauthorized online artworks or face images to train ML models. To address this problem, many efforts have been made to audit the copyright of the model training dataset. However, existing solutions vary in auditing assumptions and capabilities, making it difficult to compare their strengths and weaknesses. In addition, robustness evaluations usually consider only part of the ML pipeline and hardly reflect the performance of algorithms in real-world ML applications. Thus, it is essential to take a practical deployment perspective on the current dataset copyright auditing tools, examining their effectiveness and limitations. Concretely, we categorize dataset copyright auditing research into two prominent strands: intrusive methods and non-intrusive methods, depending on whether they require modifications to the original dataset. Then, we break down the intrusive methods into different watermark injection options and examine the non-intrusive methods using various fingerprints. To summarize our results, we offer detailed reference tables, highlight key points, and pinpoint unresolved issues in the current literature. By combining the pipeline in ML systems and analyzing previous studies, we highlight several future directions to make auditing tools more suitable for real-world copyright protection requirements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16618v1-abstract-full').style.display = 'none'; document.getElementById('2410.16618v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in the IEEE Symposium on Security and Privacy 2025, San Francisco, CA, USA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16491">arXiv:2410.16491</a> <span> [<a href="https://arxiv.org/pdf/2410.16491">pdf</a>, <a href="https://arxiv.org/format/2410.16491">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BIG5-CHAT: Shaping LLM Personalities Through Training on Human-Grounded Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenkai Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiarui Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Andy Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xuhui Zhou</a>, <a href="/search/cs?searchtype=author&query=Diab%2C+M">Mona Diab</a>, <a href="/search/cs?searchtype=author&query=Sap%2C+M">Maarten Sap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16491v1-abstract-short" style="display: inline;"> In this work, we tackle the challenge of embedding realistic human personality traits into LLMs. Previous approaches have primarily focused on prompt-based methods that describe the behavior associated with the desired personality traits, suffering from realism and validity issues. To address these limitations, we introduce BIG5-CHAT, a large-scale dataset containing 100,000 dialogues designed to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16491v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16491v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16491v1-abstract-full" style="display: none;"> In this work, we tackle the challenge of embedding realistic human personality traits into LLMs. Previous approaches have primarily focused on prompt-based methods that describe the behavior associated with the desired personality traits, suffering from realism and validity issues. To address these limitations, we introduce BIG5-CHAT, a large-scale dataset containing 100,000 dialogues designed to ground models in how humans express their personality in text. Leveraging this dataset, we explore Supervised Fine-Tuning and Direct Preference Optimization as training-based methods to align LLMs more naturally with human personality patterns. Our methods outperform prompting on personality assessments such as BFI and IPIP-NEO, with trait correlations more closely matching human data. Furthermore, our experiments reveal that models trained to exhibit higher conscientiousness, higher agreeableness, lower extraversion, and lower neuroticism display better performance on reasoning tasks, aligning with psychological findings on how these traits impact human cognitive performance. To our knowledge, this work is the first comprehensive study to demonstrate how training-based methods can shape LLM personalities through learning from real human behaviors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16491v1-abstract-full').style.display = 'none'; document.getElementById('2410.16491v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16420">arXiv:2410.16420</a> <span> [<a href="https://arxiv.org/pdf/2410.16420">pdf</a>, <a href="https://arxiv.org/format/2410.16420">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> BI-EqNO: Generalized Approximate Bayesian Inference with an Equivariant Neural Operator Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xu-Hui Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhuo-Ran Liu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+H">Heng Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16420v1-abstract-short" style="display: inline;"> Bayesian inference offers a robust framework for updating prior beliefs based on new data using Bayes' theorem, but exact inference is often computationally infeasible, necessitating approximate methods. Though widely used, these methods struggle to estimate marginal likelihoods accurately, particularly due to the rigid functional structures of deterministic models like Gaussian processes and the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16420v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16420v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16420v1-abstract-full" style="display: none;"> Bayesian inference offers a robust framework for updating prior beliefs based on new data using Bayes' theorem, but exact inference is often computationally infeasible, necessitating approximate methods. Though widely used, these methods struggle to estimate marginal likelihoods accurately, particularly due to the rigid functional structures of deterministic models like Gaussian processes and the limitations of small sample sizes in stochastic models like the ensemble Kalman method. In this work, we introduce BI-EqNO, an equivariant neural operator framework for generalized approximate Bayesian inference, designed to enhance both deterministic and stochastic approaches. BI-EqNO transforms priors into posteriors conditioned on observation data through data-driven training. The framework is flexible, supporting diverse prior and posterior representations with arbitrary discretizations and varying numbers of observations. Crucially, BI-EqNO's architecture ensures (1) permutation equivariance between prior and posterior representations, and (2) permutation invariance with respect to observational data. We demonstrate BI-EqNO's utility through two examples: (1) as a generalized Gaussian process (gGP) for regression, and (2) as an ensemble neural filter (EnNF) for sequential data assimilation. Results show that gGP outperforms traditional Gaussian processes by offering a more flexible representation of covariance functions. Additionally, EnNF not only outperforms the ensemble Kalman filter in small-ensemble settings but also has the potential to function as a "super" ensemble filter, capable of representing and integrating multiple ensemble filters for enhanced assimilation performance. This study highlights BI-EqNO's versatility and effectiveness, improving Bayesian inference through data-driven training while reducing computational costs across various applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16420v1-abstract-full').style.display = 'none'; document.getElementById('2410.16420v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16119">arXiv:2410.16119</a> <span> [<a href="https://arxiv.org/pdf/2410.16119">pdf</a>, <a href="https://arxiv.org/format/2410.16119">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SeaDAG: Semi-autoregressive Diffusion for Conditional Directed Acyclic Graph Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xinyi Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xing Li</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+Y">Yingzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yiwen Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lei Chen</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Mingxuan Yuan</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jianye Hao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Guangyong Chen</a>, <a href="/search/cs?searchtype=author&query=Heng%2C+P+A">Pheng Ann Heng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16119v1-abstract-short" style="display: inline;"> We introduce SeaDAG, a semi-autoregressive diffusion model for conditional generation of Directed Acyclic Graphs (DAGs). Considering their inherent layer-wise structure, we simulate layer-wise autoregressive generation by designing different denoising speed for different layers. Unlike conventional autoregressive generation that lacks a global graph structure view, our method maintains a complete… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16119v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16119v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16119v1-abstract-full" style="display: none;"> We introduce SeaDAG, a semi-autoregressive diffusion model for conditional generation of Directed Acyclic Graphs (DAGs). Considering their inherent layer-wise structure, we simulate layer-wise autoregressive generation by designing different denoising speed for different layers. Unlike conventional autoregressive generation that lacks a global graph structure view, our method maintains a complete graph structure at each diffusion step, enabling operations such as property control that require the full graph structure. Leveraging this capability, we evaluate the DAG properties during training by employing a graph property decoder. We explicitly train the model to learn graph conditioning with a condition loss, which enhances the diffusion model's capacity to generate graphs that are both realistic and aligned with specified properties. We evaluate our method on two representative conditional DAG generation tasks: (1) circuit generation from truth tables, where precise DAG structures are crucial for realizing circuit functionality, and (2) molecule generation based on quantum properties. Our approach demonstrates promising results, generating high-quality and realistic DAGs that closely align with given conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16119v1-abstract-full').style.display = 'none'; document.getElementById('2410.16119v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15438">arXiv:2410.15438</a> <span> [<a href="https://arxiv.org/pdf/2410.15438">pdf</a>, <a href="https://arxiv.org/format/2410.15438">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unveiling and Consulting Core Experts in Retrieval-Augmented MoE-based LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+P">Ping Nie</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yiwen Guo</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Haojie Wei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhanqiu Zhang</a>, <a href="/search/cs?searchtype=author&query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+R">Ruotian Ma</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+T">Tao Gui</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15438v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) significantly improved the ability of Large Language Models (LLMs) to solve knowledge-intensive tasks. While existing research seeks to enhance RAG performance by retrieving higher-quality documents or designing RAG-specific LLMs, the internal mechanisms within LLMs that contribute to the effectiveness of RAG systems remain underexplored. In this paper, we aim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15438v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15438v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15438v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) significantly improved the ability of Large Language Models (LLMs) to solve knowledge-intensive tasks. While existing research seeks to enhance RAG performance by retrieving higher-quality documents or designing RAG-specific LLMs, the internal mechanisms within LLMs that contribute to the effectiveness of RAG systems remain underexplored. In this paper, we aim to investigate these internal mechanisms within the popular Mixture-of-Expert (MoE)-based LLMs and demonstrate how to improve RAG by examining expert activations in these LLMs. Our controlled experiments reveal that several core groups of experts are primarily responsible for RAG-related behaviors. The activation of these core experts can signify the model's inclination towards external/internal knowledge and adjust its behavior. For instance, we identify core experts that can (1) indicate the sufficiency of the model's internal knowledge, (2) assess the quality of retrieved documents, and (3) enhance the model's ability to utilize context. Based on these findings, we propose several strategies to enhance RAG's efficiency and effectiveness through expert activation. Experimental results across various datasets and MoE-based LLMs show the effectiveness of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15438v1-abstract-full').style.display = 'none'; document.getElementById('2410.15438v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15164">arXiv:2410.15164</a> <span> [<a href="https://arxiv.org/pdf/2410.15164">pdf</a>, <a href="https://arxiv.org/format/2410.15164">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SPA-Bench: A Comprehensive Benchmark for SmartPhone Agent Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jingxuan Chen</a>, <a href="/search/cs?searchtype=author&query=Yuen%2C+D">Derek Yuen</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+B">Bin Xie</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuhao Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Gongwei Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhihao Wu</a>, <a href="/search/cs?searchtype=author&query=Yixing%2C+L">Li Yixing</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xurui Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Weiwen Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Kaiwen Zhou</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+R">Rui Shao</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+L">Liqiang Nie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasheng Wang</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jianye Hao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+K">Kun Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15164v1-abstract-short" style="display: inline;"> Smartphone agents are increasingly important for helping users control devices efficiently, with (Multimodal) Large Language Model (MLLM)-based approaches emerging as key contenders. Fairly comparing these agents is essential but challenging, requiring a varied task scope, the integration of agents with different implementations, and a generalisable evaluation pipeline to assess their strengths an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15164v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15164v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15164v1-abstract-full" style="display: none;"> Smartphone agents are increasingly important for helping users control devices efficiently, with (Multimodal) Large Language Model (MLLM)-based approaches emerging as key contenders. Fairly comparing these agents is essential but challenging, requiring a varied task scope, the integration of agents with different implementations, and a generalisable evaluation pipeline to assess their strengths and weaknesses. In this paper, we present SPA-Bench, a comprehensive SmartPhone Agent Benchmark designed to evaluate (M)LLM-based agents in an interactive environment that simulates real-world conditions. SPA-Bench offers three key contributions: (1) A diverse set of tasks covering system and third-party apps in both English and Chinese, focusing on features commonly used in daily routines; (2) A plug-and-play framework enabling real-time agent interaction with Android devices, integrating over ten agents with the flexibility to add more; (3) A novel evaluation pipeline that automatically assesses agent performance across multiple dimensions, encompassing seven metrics related to task completion and resource consumption. Our extensive experiments across tasks and agents reveal challenges like interpreting mobile user interfaces, action grounding, memory retention, and execution costs. We propose future research directions to ease these difficulties, moving closer to real-world smartphone agent applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15164v1-abstract-full').style.display = 'none'; document.getElementById('2410.15164v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14668">arXiv:2410.14668</a> <span> [<a href="https://arxiv.org/pdf/2410.14668">pdf</a>, <a href="https://arxiv.org/format/2410.14668">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MiCEval: Unveiling Multimodal Chain of Thought's Quality via Image Description and Reasoning Steps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiongtao Zhou</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jie He</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lanyu Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jingyu Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haojing Chen</a>, <a href="/search/cs?searchtype=author&query=Guti%C3%A9rrez-Basulto%2C+V">V铆ctor Guti茅rrez-Basulto</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J+Z">Jeff Z. Pan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hanjie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14668v3-abstract-short" style="display: inline;"> Multimodal Chain of Thought (MCoT) is a popular prompting strategy for improving the performance of multimodal large language models (MLLMs) across a range of complex reasoning tasks. Despite its popularity, there is a notable absence of automated methods for evaluating the quality of reasoning steps in MCoT. To address this gap, we propose Multimodal Chain-of-Thought Evaluation (MiCEval), a frame… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14668v3-abstract-full').style.display = 'inline'; document.getElementById('2410.14668v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14668v3-abstract-full" style="display: none;"> Multimodal Chain of Thought (MCoT) is a popular prompting strategy for improving the performance of multimodal large language models (MLLMs) across a range of complex reasoning tasks. Despite its popularity, there is a notable absence of automated methods for evaluating the quality of reasoning steps in MCoT. To address this gap, we propose Multimodal Chain-of-Thought Evaluation (MiCEval), a framework designed to assess the correctness of reasoning chains by evaluating the quality of both the description and each reasoning step. The evaluation of the description component focuses on the accuracy of the image descriptions, while the reasoning step evaluates the quality of each step as it is conditionally generated based on the preceding steps. MiCEval is built upon a fine-grained dataset with annotations that rate each step according to correctness, relevance, and informativeness. Extensive experiments on four state-of-the-art MLLMs show that step-wise evaluations using MiCEval align more closely with human judgments compared to existing methods based on cosine similarity or fine-tuning approaches. MiCEval datasets and code can be found in https://github.com/alenai97/MiCEval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14668v3-abstract-full').style.display = 'none'; document.getElementById('2410.14668v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">41 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13607">arXiv:2410.13607</a> <span> [<a href="https://arxiv.org/pdf/2410.13607">pdf</a>, <a href="https://arxiv.org/format/2410.13607">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DN-4DGS: Denoised Deformable Network with Temporal-Spatial Aggregation for Dynamic Scene Rendering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jiahao Lu</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiacheng Deng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Ruijie Zhu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yanzhe Liang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wenfei Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianzhu Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xu Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13607v2-abstract-short" style="display: inline;"> Dynamic scenes rendering is an intriguing yet challenging problem. Although current methods based on NeRF have achieved satisfactory performance, they still can not reach real-time levels. Recently, 3D Gaussian Splatting (3DGS) has garnered researchers attention due to their outstanding rendering quality and real-time speed. Therefore, a new paradigm has been proposed: defining a canonical 3D gaus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13607v2-abstract-full').style.display = 'inline'; document.getElementById('2410.13607v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13607v2-abstract-full" style="display: none;"> Dynamic scenes rendering is an intriguing yet challenging problem. Although current methods based on NeRF have achieved satisfactory performance, they still can not reach real-time levels. Recently, 3D Gaussian Splatting (3DGS) has garnered researchers attention due to their outstanding rendering quality and real-time speed. Therefore, a new paradigm has been proposed: defining a canonical 3D gaussians and deforming it to individual frames in deformable fields. However, since the coordinates of canonical 3D gaussians are filled with noise, which can transfer noise into the deformable fields, and there is currently no method that adequately considers the aggregation of 4D information. Therefore, we propose Denoised Deformable Network with Temporal-Spatial Aggregation for Dynamic Scene Rendering (DN-4DGS). Specifically, a Noise Suppression Strategy is introduced to change the distribution of the coordinates of the canonical 3D gaussians and suppress noise. Additionally, a Decoupled Temporal-Spatial Aggregation Module is designed to aggregate information from adjacent points and frames. Extensive experiments on various real-world datasets demonstrate that our method achieves state-of-the-art rendering quality under a real-time level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13607v2-abstract-full').style.display = 'none'; document.getElementById('2410.13607v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12396">arXiv:2410.12396</a> <span> [<a href="https://arxiv.org/pdf/2410.12396">pdf</a>, <a href="https://arxiv.org/format/2410.12396">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Feature Augmentation for Self-supervised Contrastive Learning: A Closer Look </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Rui Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shifeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xu Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shifeng Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaofan Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12396v1-abstract-short" style="display: inline;"> Self-supervised contrastive learning heavily relies on the view variance brought by data augmentation, so that it can learn a view-invariant pre-trained representation. Beyond increasing the view variance for contrast, this work focuses on improving the diversity of training data, to improve the generalization and robustness of the pre-trained models. To this end, we propose a unified framework to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12396v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12396v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12396v1-abstract-full" style="display: none;"> Self-supervised contrastive learning heavily relies on the view variance brought by data augmentation, so that it can learn a view-invariant pre-trained representation. Beyond increasing the view variance for contrast, this work focuses on improving the diversity of training data, to improve the generalization and robustness of the pre-trained models. To this end, we propose a unified framework to conduct data augmentation in the feature space, known as feature augmentation. This strategy is domain-agnostic, which augments similar features to the original ones and thus improves the data diversity. We perform a systematic investigation of various feature augmentation architectures, the gradient-flow skill, and the relationship between feature augmentation and traditional data augmentation. Our study reveals some practical principles for feature augmentation in self-contrastive learning. By integrating feature augmentation on the instance discrimination or the instance similarity paradigm, we consistently improve the performance of pre-trained feature learning and gain better generalization over the downstream image classification and object detection task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12396v1-abstract-full').style.display = 'none'; document.getElementById('2410.12396v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IJCNN 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10589">arXiv:2410.10589</a> <span> [<a href="https://arxiv.org/pdf/2410.10589">pdf</a>, <a href="https://arxiv.org/format/2410.10589">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MoTE: Reconciling Generalization with Specialization for Visual-Language to Video Knowledge Transfer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+M">Minghao Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhengpu Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+M">Mengxian Hu</a>, <a href="/search/cs?searchtype=author&query=Dang%2C+R">Ronghao Dang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xiao Lin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xun Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengju Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qijun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10589v1-abstract-short" style="display: inline;"> Transferring visual-language knowledge from large-scale foundation models for video recognition has proved to be effective. To bridge the domain gap, additional parametric modules are added to capture the temporal information. However, zero-shot generalization diminishes with the increase in the number of specialized parameters, making existing works a trade-off between zero-shot and close-set per… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10589v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10589v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10589v1-abstract-full" style="display: none;"> Transferring visual-language knowledge from large-scale foundation models for video recognition has proved to be effective. To bridge the domain gap, additional parametric modules are added to capture the temporal information. However, zero-shot generalization diminishes with the increase in the number of specialized parameters, making existing works a trade-off between zero-shot and close-set performance. In this paper, we present MoTE, a novel framework that enables generalization and specialization to be balanced in one unified model. Our approach tunes a mixture of temporal experts to learn multiple task views with various degrees of data fitting. To maximally preserve the knowledge of each expert, we propose \emph{Weight Merging Regularization}, which regularizes the merging process of experts in weight space. Additionally with temporal feature modulation to regularize the contribution of temporal feature during test. We achieve a sound balance between zero-shot and close-set video recognition tasks and obtain state-of-the-art or competitive results on various datasets, including Kinetics-400 \& 600, UCF, and HMDB. Code is available at \url{https://github.com/ZMHH-H/MoTE}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10589v1-abstract-full').style.display = 'none'; document.getElementById('2410.10589v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024 Camera Ready</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08490">arXiv:2410.08490</a> <span> [<a href="https://arxiv.org/pdf/2410.08490">pdf</a>, <a href="https://arxiv.org/format/2410.08490">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CAS-GAN for Contrast-free Angiography Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+D">De-Xing Huang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiao-Hu Zhou</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+M">Mei-Jiang Gui</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+X">Xiao-Liang Xie</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shi-Qi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuang-Yi Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+T">Tian-Yu Xiang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+Z">Zeng-Guang Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08490v1-abstract-short" style="display: inline;"> Iodinated contrast agents are widely utilized in numerous interventional procedures, yet posing substantial health risks to patients. This paper presents CAS-GAN, a novel GAN framework that serves as a ``virtual contrast agent" to synthesize X-ray angiographies via disentanglement representation learning and vessel semantic guidance, thereby reducing the reliance on iodinated agents during interve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08490v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08490v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08490v1-abstract-full" style="display: none;"> Iodinated contrast agents are widely utilized in numerous interventional procedures, yet posing substantial health risks to patients. This paper presents CAS-GAN, a novel GAN framework that serves as a ``virtual contrast agent" to synthesize X-ray angiographies via disentanglement representation learning and vessel semantic guidance, thereby reducing the reliance on iodinated agents during interventional procedures. Specifically, our approach disentangles X-ray angiographies into background and vessel components, leveraging medical prior knowledge. A specialized predictor then learns to map the interrelationships between these components. Additionally, a vessel semantic-guided generator and a corresponding loss function are introduced to enhance the visual fidelity of generated images. Experimental results on the XCAD dataset demonstrate the state-of-the-art performance of our CAS-GAN, achieving a FID of 5.94 and a MMD of 0.017. These promising results highlight CAS-GAN's potential for clinical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08490v1-abstract-full').style.display = 'none'; document.getElementById('2410.08490v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08114">arXiv:2410.08114</a> <span> [<a href="https://arxiv.org/pdf/2410.08114">pdf</a>, <a href="https://arxiv.org/format/2410.08114">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Parameter-Efficient Fine-Tuning in Spectral Domain for Point Cloud Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+D">Dingkang Liang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tianrui Feng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yumeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Z">Zhikang Zou</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+X">Xiang Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08114v1-abstract-short" style="display: inline;"> Recently, leveraging pre-training techniques to enhance point cloud models has become a hot research topic. However, existing approaches typically require full fine-tuning of pre-trained models to achieve satisfied performance on downstream tasks, accompanying storage-intensive and computationally demanding. To address this issue, we propose a novel Parameter-Efficient Fine-Tuning (PEFT) method fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08114v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08114v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08114v1-abstract-full" style="display: none;"> Recently, leveraging pre-training techniques to enhance point cloud models has become a hot research topic. However, existing approaches typically require full fine-tuning of pre-trained models to achieve satisfied performance on downstream tasks, accompanying storage-intensive and computationally demanding. To address this issue, we propose a novel Parameter-Efficient Fine-Tuning (PEFT) method for point cloud, called PointGST (Point cloud Graph Spectral Tuning). PointGST freezes the pre-trained model and introduces a lightweight, trainable Point Cloud Spectral Adapter (PCSA) to fine-tune parameters in the spectral domain. The core idea is built on two observations: 1) The inner tokens from frozen models might present confusion in the spatial domain; 2) Task-specific intrinsic information is important for transferring the general knowledge to the downstream task. Specifically, PointGST transfers the point tokens from the spatial domain to the spectral domain, effectively de-correlating confusion among tokens via using orthogonal components for separating. Moreover, the generated spectral basis involves intrinsic information about the downstream point clouds, enabling more targeted tuning. As a result, PointGST facilitates the efficient transfer of general knowledge to downstream tasks while significantly reducing training costs. Extensive experiments on challenging point cloud datasets across various tasks demonstrate that PointGST not only outperforms its fully fine-tuning counterpart but also significantly reduces trainable parameters, making it a promising solution for efficient point cloud learning. It improves upon a solid baseline by +2.28%, 1.16%, and 2.78%, resulting in 99.48%, 97.76%, and 96.18% on the ScanObjNN OBJ BG, OBJ OBLY, and PB T50 RS datasets, respectively. This advancement establishes a new state-of-the-art, using only 0.67% of the trainable parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08114v1-abstract-full').style.display = 'none'; document.getElementById('2410.08114v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The code will be made available at https://github.com/jerryfeng2003/PointGST</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07739">arXiv:2410.07739</a> <span> [<a href="https://arxiv.org/pdf/2410.07739">pdf</a>, <a href="https://arxiv.org/format/2410.07739">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SLIM: Let LLM Learn More and Forget Less with Soft LoRA and Identity Mixture </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+J">Jiayi Han</a>, <a href="/search/cs?searchtype=author&query=Du%2C+L">Liang Du</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongwei Du</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiangguo Zhou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yiwen Wu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+W">Weibo Zheng</a>, <a href="/search/cs?searchtype=author&query=Han%2C+D">Donghong Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07739v1-abstract-short" style="display: inline;"> Although many efforts have been made, it is still a challenge to balance the training budget, downstream performance, and the general capabilities of the LLMs in many applications. Training the whole model for downstream tasks is expensive, and could easily result in catastrophic forgetting. By introducing parameter-efficient fine-tuning (PEFT), the training cost could be reduced, but it still suf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07739v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07739v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07739v1-abstract-full" style="display: none;"> Although many efforts have been made, it is still a challenge to balance the training budget, downstream performance, and the general capabilities of the LLMs in many applications. Training the whole model for downstream tasks is expensive, and could easily result in catastrophic forgetting. By introducing parameter-efficient fine-tuning (PEFT), the training cost could be reduced, but it still suffers from forgetting, and limits the learning on the downstream tasks. To efficiently fine-tune the LLMs with less limitation to their downstream performance while mitigating the forgetting of general capabilities, we propose a novel mixture of expert (MoE) framework based on Soft LoRA and Identity Mixture (SLIM), that allows dynamic routing between LoRA adapters and skipping connection, enables the suppression of forgetting. We adopt weight-yielding with sliding clustering for better out-of-domain distinguish to enhance the routing. We also propose to convert the mixture of low-rank adapters to the model merging formulation and introduce fast dynamic merging of LoRA adapters to keep the general capabilities of the base model. Extensive experiments demonstrate that the proposed SLIM is comparable to the state-of-the-art PEFT approaches on the downstream tasks while achieving the leading performance in mitigating catastrophic forgetting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07739v1-abstract-full').style.display = 'none'; document.getElementById('2410.07739v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 6 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07135">arXiv:2410.07135</a> <span> [<a href="https://arxiv.org/pdf/2410.07135">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Causal Inference with Double/Debiased Machine Learning for Evaluating the Health Effects of Multiple Mismeasured Pollutants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+G">Gang Xu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Molin Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Boya Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+W">Wenhao Jiang</a>, <a href="/search/cs?searchtype=author&query=Laden%2C+F">Francine Laden</a>, <a href="/search/cs?searchtype=author&query=Suh%2C+H+H">Helen H. Suh</a>, <a href="/search/cs?searchtype=author&query=Szpiro%2C+A+A">Adam A. Szpiro</a>, <a href="/search/cs?searchtype=author&query=Spiegelman%2C+D">Donna Spiegelman</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zuoheng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07135v1-abstract-short" style="display: inline;"> One way to quantify exposure to air pollution and its constituents in epidemiologic studies is to use an individual's nearest monitor. This strategy results in potential inaccuracy in the actual personal exposure, introducing bias in estimating the health effects of air pollution and its constituents, especially when evaluating the causal effects of correlated multi-pollutant constituents measured… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07135v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07135v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07135v1-abstract-full" style="display: none;"> One way to quantify exposure to air pollution and its constituents in epidemiologic studies is to use an individual's nearest monitor. This strategy results in potential inaccuracy in the actual personal exposure, introducing bias in estimating the health effects of air pollution and its constituents, especially when evaluating the causal effects of correlated multi-pollutant constituents measured with correlated error. This paper addresses estimation and inference for the causal effect of one constituent in the presence of other PM2.5 constituents, accounting for measurement error and correlations. We used a linear regression calibration model, fitted with generalized estimating equations in an external validation study, and extended a double/debiased machine learning (DML) approach to correct for measurement error and estimate the effect of interest in the main study. We demonstrated that the DML estimator with regression calibration is consistent and derived its asymptotic variance. Simulations showed that the proposed estimator reduced bias and attained nominal coverage probability across most simulation settings. We applied this method to assess the causal effects of PM2.5 constituents on cognitive function in the Nurses' Health Study and identified two PM2.5 constituents, Br and Mn, that showed a negative causal effect on cognitive function after measurement error correction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07135v1-abstract-full').style.display = 'none'; document.getElementById('2410.07135v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06704">arXiv:2410.06704</a> <span> [<a href="https://arxiv.org/pdf/2410.06704">pdf</a>, <a href="https://arxiv.org/format/2410.06704">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PII-Scope: A Benchmark for Training Data PII Leakage Assessment in LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nakka%2C+K+K">Krishna Kanth Nakka</a>, <a href="/search/cs?searchtype=author&query=Frikha%2C+A">Ahmed Frikha</a>, <a href="/search/cs?searchtype=author&query=Mendes%2C+R">Ricardo Mendes</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xue Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xuebing Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06704v1-abstract-short" style="display: inline;"> In this work, we introduce PII-Scope, a comprehensive benchmark designed to evaluate state-of-the-art methodologies for PII extraction attacks targeting LLMs across diverse threat settings. Our study provides a deeper understanding of these attacks by uncovering several hyperparameters (e.g., demonstration selection) crucial to their effectiveness. Building on this understanding, we extend our stu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06704v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06704v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06704v1-abstract-full" style="display: none;"> In this work, we introduce PII-Scope, a comprehensive benchmark designed to evaluate state-of-the-art methodologies for PII extraction attacks targeting LLMs across diverse threat settings. Our study provides a deeper understanding of these attacks by uncovering several hyperparameters (e.g., demonstration selection) crucial to their effectiveness. Building on this understanding, we extend our study to more realistic attack scenarios, exploring PII attacks that employ advanced adversarial strategies, including repeated and diverse querying, and leveraging iterative learning for continual PII extraction. Through extensive experimentation, our results reveal a notable underestimation of PII leakage in existing single-query attacks. In fact, we show that with sophisticated adversarial capabilities and a limited query budget, PII extraction rates can increase by up to fivefold when targeting the pretrained model. Moreover, we evaluate PII leakage on finetuned models, showing that they are more vulnerable to leakage than pretrained models. Overall, our work establishes a rigorous empirical benchmark for PII extraction attacks in realistic threat scenarios and provides a strong foundation for developing effective mitigation strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06704v1-abstract-full').style.display = 'none'; document.getElementById('2410.06704v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06494">arXiv:2410.06494</a> <span> [<a href="https://arxiv.org/pdf/2410.06494">pdf</a>, <a href="https://arxiv.org/format/2410.06494">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Conformal Prediction: A Data Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaofan Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Baiting Chen</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+Y">Yu Gui</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+L">Lu Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06494v2-abstract-short" style="display: inline;"> Conformal prediction (CP), a distribution-free uncertainty quantification (UQ) framework, reliably provides valid predictive inference for black-box models. CP constructs prediction sets that contain the true output with a specified probability. However, modern data science diverse modalities, along with increasing data and model complexity, challenge traditional CP methods. These developments hav… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06494v2-abstract-full').style.display = 'inline'; document.getElementById('2410.06494v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06494v2-abstract-full" style="display: none;"> Conformal prediction (CP), a distribution-free uncertainty quantification (UQ) framework, reliably provides valid predictive inference for black-box models. CP constructs prediction sets that contain the true output with a specified probability. However, modern data science diverse modalities, along with increasing data and model complexity, challenge traditional CP methods. These developments have spurred novel approaches to address evolving scenarios. This survey reviews the foundational concepts of CP and recent advancements from a data-centric perspective, including applications to structured, unstructured, and dynamic data. We also discuss the challenges and opportunities CP faces in large-scale data and models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06494v2-abstract-full').style.display = 'none'; document.getElementById('2410.06494v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages, journal, survey</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T37 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> A.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05752">arXiv:2410.05752</a> <span> [<a href="https://arxiv.org/pdf/2410.05752">pdf</a>, <a href="https://arxiv.org/format/2410.05752">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Meaningfulness of Nearest Neighbor Search in High-Dimensional Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhonghan Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruiyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xi Zhao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiaojun Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaofang Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05752v1-abstract-short" style="display: inline;"> Dense high dimensional vectors are becoming increasingly vital in fields such as computer vision, machine learning, and large language models (LLMs), serving as standard representations for multimodal data. Now the dimensionality of these vector can exceed several thousands easily. Despite the nearest neighbor search (NNS) over these dense high dimensional vectors have been widely used for retriev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05752v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05752v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05752v1-abstract-full" style="display: none;"> Dense high dimensional vectors are becoming increasingly vital in fields such as computer vision, machine learning, and large language models (LLMs), serving as standard representations for multimodal data. Now the dimensionality of these vector can exceed several thousands easily. Despite the nearest neighbor search (NNS) over these dense high dimensional vectors have been widely used for retrieval augmented generation (RAG) and many other applications, the effectiveness of NNS in such a high-dimensional space remains uncertain, given the possible challenge caused by the "curse of dimensionality." To address above question, in this paper, we conduct extensive NNS studies with different distance functions, such as $L_1$ distance, $L_2$ distance and angular-distance, across diverse embedding datasets, of varied types, dimensionality and modality. Our aim is to investigate factors influencing the meaningfulness of NNS. Our experiments reveal that high-dimensional text embeddings exhibit increased resilience as dimensionality rises to higher levels when compared to random vectors. This resilience suggests that text embeddings are less affected to the "curse of dimensionality," resulting in more meaningful NNS outcomes for practical use. Additionally, the choice of distance function has minimal impact on the relevance of NNS. Our study shows the effectiveness of the embedding-based data representation method and can offer opportunity for further optimization of dense vector-related applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05752v1-abstract-full').style.display = 'none'; document.getElementById('2410.05752v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05090">arXiv:2410.05090</a> <span> [<a href="https://arxiv.org/pdf/2410.05090">pdf</a>, <a href="https://arxiv.org/format/2410.05090">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> HyperINF: Unleashing the HyperPower of the Schulz's Method for Data Influence Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xinyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+S">Simin Fan</a>, <a href="/search/cs?searchtype=author&query=Jaggi%2C+M">Martin Jaggi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05090v1-abstract-short" style="display: inline;"> Influence functions provide a principled method to assess the contribution of individual training samples to a specific target. Yet, their high computational costs limit their applications on large-scale models and datasets. Existing methods proposed for influence function approximation have significantly reduced the computational overheads. However, they mostly suffer from inaccurate estimation d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05090v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05090v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05090v1-abstract-full" style="display: none;"> Influence functions provide a principled method to assess the contribution of individual training samples to a specific target. Yet, their high computational costs limit their applications on large-scale models and datasets. Existing methods proposed for influence function approximation have significantly reduced the computational overheads. However, they mostly suffer from inaccurate estimation due to the lack of strong convergence guarantees from the algorithm. The family of hyperpower methods are well-known for their rigorous convergence guarantees on matrix inverse approximation, while the matrix multiplication operation can involve intractable memory and computation costs on large-scale models. We propose HyperINF, an efficient and accurate influence function approximation method which leverages the hyperpower method, specifically Schulz's iterative algorithm. To deal with the computation-intensive matrix multiplication, we incorporate the generalized fisher information (GFIM) as a low-rank approximation of the Hessian matrix, which reduces the memory and computation overheads to constant costs independent of ranks on LoRA-tuned models. We first demonstrate the superior accuracy and stability of \method compared to other baselines through a synthetic convergence simulation for matrix inversion. We further validate the efficacy of \method through extensive real-world data attribution tasks, including mislabeled data detection and data selection for LLM and VLM fine-tuning. On LoRA-tuned models, HyperINF achieves superior downstream performance with minimal memory and computational overhead, while other baselines suffer from significant degradation. Our codebase is available at https://github.com/Blackzxy/HyperINF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05090v1-abstract-full').style.display = 'none'; document.getElementById('2410.05090v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04853">arXiv:2410.04853</a> <span> [<a href="https://arxiv.org/pdf/2410.04853">pdf</a>, <a href="https://arxiv.org/format/2410.04853">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> TimeCNN: Refining Cross-Variable Interaction on Time Point for Time Series Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+A">Ao Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dongkai Wang</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+Y">Yong Dai</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+S">Shiyi Qi</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+L">Liangjian Wen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhi Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xun Zhou</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zenglin Xu</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+J">Jiang Duan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04853v1-abstract-short" style="display: inline;"> Time series forecasting is extensively applied across diverse domains. Transformer-based models demonstrate significant potential in modeling cross-time and cross-variable interaction. However, we notice that the cross-variable correlation of multivariate time series demonstrates multifaceted (positive and negative correlations) and dynamic progression over time, which is not well captured by exis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04853v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04853v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04853v1-abstract-full" style="display: none;"> Time series forecasting is extensively applied across diverse domains. Transformer-based models demonstrate significant potential in modeling cross-time and cross-variable interaction. However, we notice that the cross-variable correlation of multivariate time series demonstrates multifaceted (positive and negative correlations) and dynamic progression over time, which is not well captured by existing Transformer-based models. To address this issue, we propose a TimeCNN model to refine cross-variable interactions to enhance time series forecasting. Its key innovation is timepoint-independent, where each time point has an independent convolution kernel, allowing each time point to have its independent model to capture relationships among variables. This approach effectively handles both positive and negative correlations and adapts to the evolving nature of variable relationships over time. Extensive experiments conducted on 12 real-world datasets demonstrate that TimeCNN consistently outperforms state-of-the-art models. Notably, our model achieves significant reductions in computational requirements (approximately 60.46%) and parameter count (about 57.50%), while delivering inference speeds 3 to 4 times faster than the benchmark iTransformer model <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04853v1-abstract-full').style.display = 'none'; document.getElementById('2410.04853v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04681">arXiv:2410.04681</a> <span> [<a href="https://arxiv.org/pdf/2410.04681">pdf</a>, <a href="https://arxiv.org/format/2410.04681">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Coverage Analysis for 3D Indoor Terahertz Communication System Over Fluctuating Two-Ray Fading Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhifeng Tang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+N">Nan Yang</a>, <a href="/search/cs?searchtype=author&query=Durrani%2C+S">Salman Durrani</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiangyun Zhou</a>, <a href="/search/cs?searchtype=author&query=Juntti%2C+M">Markku Juntti</a>, <a href="/search/cs?searchtype=author&query=Jornet%2C+J+M">Josep Miquel Jornet</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04681v1-abstract-short" style="display: inline;"> In this paper, we develop a novel analytical framework for a three-dimensional (3D) indoor terahertz (THz) communication system. Our proposed model incorporates more accurate modeling of wall blockages via Manhattan line processes and precise modeling of THz fading channels via a fluctuating two-ray (FTR) channel model. We also account for traditional unique features of THz, such as molecular abso… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04681v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04681v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04681v1-abstract-full" style="display: none;"> In this paper, we develop a novel analytical framework for a three-dimensional (3D) indoor terahertz (THz) communication system. Our proposed model incorporates more accurate modeling of wall blockages via Manhattan line processes and precise modeling of THz fading channels via a fluctuating two-ray (FTR) channel model. We also account for traditional unique features of THz, such as molecular absorption loss, user blockages, and 3D directional antenna beams. Moreover, we model locations of access points (APs) using a Poisson point process and adopt the nearest line-of-sight AP association strategy. Due to the high penetration loss caused by wall blockages, we consider that a user equipment (UE) and its associated AP and interfering APs are all in the same rectangular area, i.e., a room. Based on the proposed rectangular area model, we evaluate the impact of the UE's location on the distance to its associated AP. We then develop a tractable method to derive a new expression for the coverage probability by examining the interference from interfering APs and considering the FTR fading experienced by THz communications. Aided by simulation results, we validate our analysis and demonstrate that the UE's location has a pronounced impact on its coverage probability. Additionally, we find that the optimal AP density is determined by both the UE's location and the room size, which provides valuable insights for meeting the coverage requirements of future THz communication system deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04681v1-abstract-full').style.display = 'none'; document.getElementById('2410.04681v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03049">arXiv:2410.03049</a> <span> [<a href="https://arxiv.org/pdf/2410.03049">pdf</a>, <a href="https://arxiv.org/format/2410.03049">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scalable Frame-based Construction of Sociocultural NormBases for Socially-Aware Dialogues </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+S">Shilin Qu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weiqing Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+H">Haolan Zhan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuang Li</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lizhen Qu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Linhao Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuan-Fang Li</a>, <a href="/search/cs?searchtype=author&query=Haffari%2C+G">Gholamreza Haffari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03049v1-abstract-short" style="display: inline;"> Sociocultural norms serve as guiding principles for personal conduct in social interactions, emphasizing respect, cooperation, and appropriate behavior, which is able to benefit tasks including conversational information retrieval, contextual information retrieval and retrieval-enhanced machine learning. We propose a scalable approach for constructing a Sociocultural Norm (SCN) Base using Large La… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03049v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03049v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03049v1-abstract-full" style="display: none;"> Sociocultural norms serve as guiding principles for personal conduct in social interactions, emphasizing respect, cooperation, and appropriate behavior, which is able to benefit tasks including conversational information retrieval, contextual information retrieval and retrieval-enhanced machine learning. We propose a scalable approach for constructing a Sociocultural Norm (SCN) Base using Large Language Models (LLMs) for socially aware dialogues. We construct a comprehensive and publicly accessible Chinese Sociocultural NormBase. Our approach utilizes socially aware dialogues, enriched with contextual frames, as the primary data source to constrain the generating process and reduce the hallucinations. This enables extracting of high-quality and nuanced natural-language norm statements, leveraging the pragmatic implications of utterances with respect to the situation. As real dialogue annotated with gold frames are not readily available, we propose using synthetic data. Our empirical results show: (i) the quality of the SCNs derived from synthetic data is comparable to that from real dialogues annotated with gold frames, and (ii) the quality of the SCNs extracted from real data, annotated with either silver (predicted) or gold frames, surpasses that without the frame annotations. We further show the effectiveness of the extracted SCNs in a RAG-based (Retrieval-Augmented Generation) model to reason about multiple downstream dialogue tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03049v1-abstract-full').style.display = 'none'; document.getElementById('2410.03049v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> TOMM 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01529">arXiv:2410.01529</a> <span> [<a href="https://arxiv.org/pdf/2410.01529">pdf</a>, <a href="https://arxiv.org/format/2410.01529">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robo-MUTUAL: Robotic Multimodal Task Specification via Unimodal Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianxiong Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhihao Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jinliang Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaoai Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guanming Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+G">Guanglu Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yu Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingjing Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ya-Qin Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Junzhi Yu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+X">Xianyuan Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01529v1-abstract-short" style="display: inline;"> Multimodal task specification is essential for enhanced robotic performance, where \textit{Cross-modality Alignment} enables the robot to holistically understand complex task instructions. Directly annotating multimodal instructions for model training proves impractical, due to the sparsity of paired multimodal data. In this study, we demonstrate that by leveraging unimodal instructions abundant i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01529v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01529v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01529v1-abstract-full" style="display: none;"> Multimodal task specification is essential for enhanced robotic performance, where \textit{Cross-modality Alignment} enables the robot to holistically understand complex task instructions. Directly annotating multimodal instructions for model training proves impractical, due to the sparsity of paired multimodal data. In this study, we demonstrate that by leveraging unimodal instructions abundant in real data, we can effectively teach robots to learn multimodal task specifications. First, we endow the robot with strong \textit{Cross-modality Alignment} capabilities, by pretraining a robotic multimodal encoder using extensive out-of-domain data. Then, we employ two Collapse and Corrupt operations to further bridge the remaining modality gap in the learned multimodal representation. This approach projects different modalities of identical task goal as interchangeable representations, thus enabling accurate robotic operations within a well-aligned multimodal latent space. Evaluation across more than 130 tasks and 4000 evaluations on both simulated LIBERO benchmark and real robot platforms showcases the superior capabilities of our proposed framework, demonstrating significant advantage in overcoming data constraints in robotic learning. Website: zh1hao.wang/Robo_MUTUAL <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01529v1-abstract-full').style.display = 'none'; document.getElementById('2410.01529v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">preprint</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhou%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository