Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 291 results for author: <span class="mathjax">Chu, X</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Chu%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Chu, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Chu%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Chu, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chu%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15715">arXiv:2411.15715</a> <span> [<a href="https://arxiv.org/pdf/2411.15715">pdf</a>, <a href="https://arxiv.org/format/2411.15715">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Task Scheduling for Efficient Inference of Large Language Models on Single Moderate GPU Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wenxiang Lin</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xinglin Pan</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Shaohuai Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xuan Wang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15715v1-abstract-short" style="display: inline;"> Large language models~(LLMs) are known for their high demand on computing resources and memory due to their substantial model size, which leads to inefficient inference on moderate GPU systems. Techniques like quantization or pruning can shrink model sizes but often impair accuracy, making them unsuitable for practical applications. In this work, we introduce \modelname{}, a high-performance infer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15715v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15715v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15715v1-abstract-full" style="display: none;"> Large language models~(LLMs) are known for their high demand on computing resources and memory due to their substantial model size, which leads to inefficient inference on moderate GPU systems. Techniques like quantization or pruning can shrink model sizes but often impair accuracy, making them unsuitable for practical applications. In this work, we introduce \modelname{}, a high-performance inference engine designed to speed up LLM inference without compromising model accuracy. \modelname{} incorporates three innovative methods to increase inference efficiency: 1) model partitioning to allow asynchronous processing of tasks across CPU computation, GPU computation, and CPU-GPU communication, 2) an adaptive partition algorithm to optimize the use of CPU, GPU, and PCIe communication capabilities, and 3) a token assignment strategy to handle diverse prompt and generation tasks during LLM inference. Comprehensive experiments were conducted with various LLMs such as Mixtral, LLaMA-2, Qwen, and PhiMoE across three test environments featuring different CPUs and GPUs. The experimental findings demonstrate that \modelname{} achieves speeds between $1.11\times$ to $1.80\times$ faster in decoding and $1.69\times$ to $6.33\times$ faster in pre-filling, leading to an overall speedup ranging from $1.25\times$ to $2.04\times$ compared to state-of-the-art solutions, llama.cpp and Fiddler. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15715v1-abstract-full').style.display = 'none'; document.getElementById('2411.15715v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14871">arXiv:2411.14871</a> <span> [<a href="https://arxiv.org/pdf/2411.14871">pdf</a>, <a href="https://arxiv.org/format/2411.14871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Prioritize Denoising Steps on Diffusion Model Preference Alignment via Explicit Denoised Distribution Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+D">Dingyuan Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yong Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hangyu Li</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiangxiang Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14871v1-abstract-short" style="display: inline;"> Diffusion models have shown remarkable success in text-to-image generation, making alignment methods for these models increasingly important. A key challenge is the sparsity of preference labels, which are typically available only at the terminal of denoising trajectories. This raises the issue of how to assign credit across denoising steps based on these sparse labels. In this paper, we propose D… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14871v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14871v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14871v1-abstract-full" style="display: none;"> Diffusion models have shown remarkable success in text-to-image generation, making alignment methods for these models increasingly important. A key challenge is the sparsity of preference labels, which are typically available only at the terminal of denoising trajectories. This raises the issue of how to assign credit across denoising steps based on these sparse labels. In this paper, we propose Denoised Distribution Estimation (DDE), a novel method for credit assignment. Unlike previous approaches that rely on auxiliary models or hand-crafted schemes, DDE derives its strategy more explicitly. The proposed DDE directly estimates the terminal denoised distribution from the perspective of each step. It is equipped with two estimation strategies and capable of representing the entire denoising trajectory with a single model inference. Theoretically and empirically, we show that DDE prioritizes optimizing the middle part of the denoising trajectory, resulting in a novel and effective credit assignment scheme. Extensive experiments demonstrate that our approach achieves superior performance, both quantitatively and qualitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14871v1-abstract-full').style.display = 'none'; document.getElementById('2411.14871v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14062">arXiv:2411.14062</a> <span> [<a href="https://arxiv.org/pdf/2411.14062">pdf</a>, <a href="https://arxiv.org/format/2411.14062">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MMGenBench: Evaluating the Limits of LMMs from the Text-to-Image Generation Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hailang Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yong Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zixuan Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Huaqiu Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tongwen Huang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiangxiang Chu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Richong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14062v1-abstract-short" style="display: inline;"> Large Multimodal Models (LMMs) have demonstrated remarkable capabilities. While existing benchmarks for evaluating LMMs mainly focus on image comprehension, few works evaluate them from the image generation perspective. To address this issue, we propose a straightforward automated evaluation pipeline. Specifically, this pipeline requires LMMs to generate an image-prompt from a given input image. S… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14062v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14062v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14062v1-abstract-full" style="display: none;"> Large Multimodal Models (LMMs) have demonstrated remarkable capabilities. While existing benchmarks for evaluating LMMs mainly focus on image comprehension, few works evaluate them from the image generation perspective. To address this issue, we propose a straightforward automated evaluation pipeline. Specifically, this pipeline requires LMMs to generate an image-prompt from a given input image. Subsequently, it employs text-to-image generative models to create a new image based on these generated prompts. Finally, we evaluate the performance of LMMs by comparing the original image with the generated one. Furthermore, we introduce MMGenBench-Test, a comprehensive benchmark developed to evaluate LMMs across 13 distinct image patterns, and MMGenBench-Domain, targeting the performance evaluation of LMMs within the generative image domain. A thorough evaluation involving over 50 popular LMMs demonstrates the effectiveness and reliability in both the pipeline and benchmark. Our observations indicate that numerous LMMs excelling in existing benchmarks fail to adequately complete the basic tasks, related to image understanding and description. This finding highlights the substantial potential for performance improvement in current LMMs and suggests avenues for future model optimization. Concurrently, our pipeline facilitates the efficient assessment of LMMs performance across diverse domains by using solely image inputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14062v1-abstract-full').style.display = 'none'; document.getElementById('2411.14062v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This project is available at: https://github.com/lerogo/MMGenBench</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13602">arXiv:2411.13602</a> <span> [<a href="https://arxiv.org/pdf/2411.13602">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Large-scale cross-modality pretrained model enhances cardiovascular state estimation and cardiomyopathy detection from electrocardiograms: An AI system development and multi-center validation study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+Z">Zhengyao Ding</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yujian Hu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Youyao Xu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Chengchen Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziyu Li</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+Y">Yiheng Mao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haitao Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qian Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jing Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yue Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mengjia Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Longbo Wang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xuesen Chu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+W">Weichao Pan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziyi Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fei Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongkun Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Ting Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhengxing Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13602v1-abstract-short" style="display: inline;"> Cardiovascular diseases (CVDs) present significant challenges for early and accurate diagnosis. While cardiac magnetic resonance imaging (CMR) is the gold standard for assessing cardiac function and diagnosing CVDs, its high cost and technical complexity limit accessibility. In contrast, electrocardiography (ECG) offers promise for large-scale early screening. This study introduces CardiacNets, an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13602v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13602v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13602v1-abstract-full" style="display: none;"> Cardiovascular diseases (CVDs) present significant challenges for early and accurate diagnosis. While cardiac magnetic resonance imaging (CMR) is the gold standard for assessing cardiac function and diagnosing CVDs, its high cost and technical complexity limit accessibility. In contrast, electrocardiography (ECG) offers promise for large-scale early screening. This study introduces CardiacNets, an innovative model that enhances ECG analysis by leveraging the diagnostic strengths of CMR through cross-modal contrastive learning and generative pretraining. CardiacNets serves two primary functions: (1) it evaluates detailed cardiac function indicators and screens for potential CVDs, including coronary artery disease, cardiomyopathy, pericarditis, heart failure and pulmonary hypertension, using ECG input; and (2) it enhances interpretability by generating high-quality CMR images from ECG data. We train and validate the proposed CardiacNets on two large-scale public datasets (the UK Biobank with 41,519 individuals and the MIMIC-IV-ECG comprising 501,172 samples) as well as three private datasets (FAHZU with 410 individuals, SAHZU with 464 individuals, and QPH with 338 individuals), and the findings demonstrate that CardiacNets consistently outperforms traditional ECG-only models, substantially improving screening accuracy. Furthermore, the generated CMR images provide valuable diagnostic support for physicians of all experience levels. This proof-of-concept study highlights how ECG can facilitate cross-modal insights into cardiac function assessment, paving the way for enhanced CVD screening and diagnosis at a population level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13602v1-abstract-full').style.display = 'none'; document.getElementById('2411.13602v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09007">arXiv:2411.09007</a> <span> [<a href="https://arxiv.org/pdf/2411.09007">pdf</a>, <a href="https://arxiv.org/format/2411.09007">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Scale Contrastive Learning with Selective Attentions for Blind Image Quality Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zihao Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xudong Li</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+B">Bohan Fu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaohui Chu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Ke Li</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yunhang Shen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09007v1-abstract-short" style="display: inline;"> Blind image quality assessment (BIQA) serves as a fundamental task in computer vision, yet it often fails to consistently align with human subjective perception. Recent advances show that multi-scale evaluation strategies are promising due to their ability to replicate the hierarchical structure of human vision. However, the effectiveness of these strategies is limited by a lack of understanding o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09007v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09007v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09007v1-abstract-full" style="display: none;"> Blind image quality assessment (BIQA) serves as a fundamental task in computer vision, yet it often fails to consistently align with human subjective perception. Recent advances show that multi-scale evaluation strategies are promising due to their ability to replicate the hierarchical structure of human vision. However, the effectiveness of these strategies is limited by a lack of understanding of how different image scales influence perceived quality. This paper addresses two primary challenges: the significant redundancy of information across different scales, and the confusion caused by combining features from these scales, which may vary widely in quality. To this end, a new multi-scale BIQA framework is proposed, namely Contrast-Constrained Scale-Focused IQA Framework (CSFIQA). CSFIQA features a selective focus attention mechanism to minimize information redundancy and highlight critical quality-related information. Additionally, CSFIQA includes a scale-level contrastive learning module equipped with a noise sample matching mechanism to identify quality discrepancies across the same image content at different scales. By exploring the intrinsic relationship between image scales and the perceived quality, the proposed CSFIQA achieves leading performance on eight benchmark datasets, e.g., achieving SRCC values of 0.967 (versus 0.947 in CSIQ) and 0.905 (versus 0.876 in LIVEC). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09007v1-abstract-full').style.display = 'none'; document.getElementById('2411.09007v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23855">arXiv:2410.23855</a> <span> [<a href="https://arxiv.org/pdf/2410.23855">pdf</a>, <a href="https://arxiv.org/format/2410.23855">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> RAGraph: A General Retrieval-Augmented Graph Learning Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xinke Jiang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+R">Rihong Qiu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yongxin Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wentao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yichen Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruizhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yuchen Fang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junfeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasha Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23855v1-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs) have become essential in interpreting relational data across various domains, yet, they often struggle to generalize to unseen graph data that differs markedly from training instances. In this paper, we introduce a novel framework called General Retrieval-Augmented Graph Learning (RAGraph), which brings external graph data into the general graph foundation model to imp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23855v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23855v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23855v1-abstract-full" style="display: none;"> Graph Neural Networks (GNNs) have become essential in interpreting relational data across various domains, yet, they often struggle to generalize to unseen graph data that differs markedly from training instances. In this paper, we introduce a novel framework called General Retrieval-Augmented Graph Learning (RAGraph), which brings external graph data into the general graph foundation model to improve model generalization on unseen scenarios. On the top of our framework is a toy graph vector library that we established, which captures key attributes, such as features and task-specific label information. During inference, the RAGraph adeptly retrieves similar toy graphs based on key similarities in downstream tasks, integrating the retrieved data to enrich the learning context via the message-passing prompting mechanism. Our extensive experimental evaluations demonstrate that RAGraph significantly outperforms state-of-the-art graph learning methods in multiple tasks such as node classification, link prediction, and graph classification across both dynamic and static datasets. Furthermore, extensive testing confirms that RAGraph consistently maintains high performance without the need for task-specific fine-tuning, highlighting its adaptability, robustness, and broad applicability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23855v1-abstract-full').style.display = 'none'; document.getElementById('2410.23855v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21758">arXiv:2410.21758</a> <span> [<a href="https://arxiv.org/pdf/2410.21758">pdf</a>, <a href="https://arxiv.org/format/2410.21758">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> DOFS: A Real-world 3D Deformable Object Dataset with Full Spatial Information for Dynamics Model Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiangyu Chu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yunxi Tang</a>, <a href="/search/cs?searchtype=author&query=Au%2C+K+W+S">K. W. Samuel Au</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21758v1-abstract-short" style="display: inline;"> This work proposes DOFS, a pilot dataset of 3D deformable objects (DOs) (e.g., elasto-plastic objects) with full spatial information (i.e., top, side, and bottom information) using a novel and low-cost data collection platform with a transparent operating plane. The dataset consists of active manipulation action, multi-view RGB-D images, well-registered point clouds, 3D deformed mesh, and 3D occup… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21758v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21758v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21758v1-abstract-full" style="display: none;"> This work proposes DOFS, a pilot dataset of 3D deformable objects (DOs) (e.g., elasto-plastic objects) with full spatial information (i.e., top, side, and bottom information) using a novel and low-cost data collection platform with a transparent operating plane. The dataset consists of active manipulation action, multi-view RGB-D images, well-registered point clouds, 3D deformed mesh, and 3D occupancy with semantics, using a pinching strategy with a two-parallel-finger gripper. In addition, we trained a neural network with the down-sampled 3D occupancy and action as input to model the dynamics of an elasto-plastic object. Our dataset and all CADs of the data collection system will be released soon on our website. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21758v1-abstract-full').style.display = 'none'; document.getElementById('2410.21758v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 6 figures, 2024 CoRL Workshop on Learning Robot Fine and Dexterous Manipulation: Perception and Control</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20380">arXiv:2410.20380</a> <span> [<a href="https://arxiv.org/pdf/2410.20380">pdf</a>, <a href="https://arxiv.org/format/2410.20380">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> FuseFL: One-Shot Federated Learning through the Lens of Causality with Progressive Model Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yonggang Zhang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+P">Peijie Dong</a>, <a href="/search/cs?searchtype=author&query=Cheung%2C+Y">Yiu-ming Cheung</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A+C">Amelie Chi Zhou</a>, <a href="/search/cs?searchtype=author&query=Han%2C+B">Bo Han</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20380v1-abstract-short" style="display: inline;"> One-shot Federated Learning (OFL) significantly reduces communication costs in FL by aggregating trained models only once. However, the performance of advanced OFL methods is far behind the normal FL. In this work, we provide a causal view to find that this performance drop of OFL methods comes from the isolation problem, which means that local isolatedly trained models in OFL may easily fit to sp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20380v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20380v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20380v1-abstract-full" style="display: none;"> One-shot Federated Learning (OFL) significantly reduces communication costs in FL by aggregating trained models only once. However, the performance of advanced OFL methods is far behind the normal FL. In this work, we provide a causal view to find that this performance drop of OFL methods comes from the isolation problem, which means that local isolatedly trained models in OFL may easily fit to spurious correlations due to the data heterogeneity. From the causal perspective, we observe that the spurious fitting can be alleviated by augmenting intermediate features from other clients. Built upon our observation, we propose a novel learning approach to endow OFL with superb performance and low communication and storage costs, termed as FuseFL. Specifically, FuseFL decomposes neural networks into several blocks, and progressively trains and fuses each block following a bottom-up manner for feature augmentation, introducing no additional communication costs. Comprehensive experiments demonstrate that FuseFL outperforms existing OFL and ensemble FL by a significant margin. We conduct comprehensive experiments to show that FuseFL supports high scalability of clients, heterogeneous model training, and low memory costs. Our work is the first attempt using causality to analyze and alleviate data heterogeneity of OFL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20380v1-abstract-full').style.display = 'none'; document.getElementById('2410.20380v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18785">arXiv:2410.18785</a> <span> [<a href="https://arxiv.org/pdf/2410.18785">pdf</a>, <a href="https://arxiv.org/format/2410.18785">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Should We Really Edit Language Models? On the Evaluation of Edited Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qi Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiang Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+P">Peijie Dong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zeyu Li</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xinglin Pan</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18785v1-abstract-short" style="display: inline;"> Model editing has become an increasingly popular alternative for efficiently updating knowledge within language models. Current methods mainly focus on reliability, generalization, and locality, with many methods excelling across these criteria. Some recent works disclose the pitfalls of these editing methods such as knowledge distortion or conflict. However, the general abilities of post-edited l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18785v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18785v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18785v1-abstract-full" style="display: none;"> Model editing has become an increasingly popular alternative for efficiently updating knowledge within language models. Current methods mainly focus on reliability, generalization, and locality, with many methods excelling across these criteria. Some recent works disclose the pitfalls of these editing methods such as knowledge distortion or conflict. However, the general abilities of post-edited language models remain unexplored. In this paper, we perform a comprehensive evaluation on various editing methods and different language models, and have following findings. (1) Existing editing methods lead to inevitable performance deterioration on general benchmarks, indicating that existing editing methods maintain the general abilities of the model within only a few dozen edits. When the number of edits is slightly large, the intrinsic knowledge structure of the model is disrupted or even completely damaged. (2) Instruction-tuned models are more robust to editing, showing less performance drop on general knowledge after editing. (3) Language model with large scale is more resistant to editing compared to small model. (4) The safety of the edited model, is significantly weakened, even for those safety-aligned models. Our findings indicate that current editing methods are only suitable for small-scale knowledge updates within language models, which motivates further research on more practical and reliable editing methods. The details of code and reproduction can be found in https://github.com/lqinfdim/EditingEvaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18785v1-abstract-full').style.display = 'none'; document.getElementById('2410.18785v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024 https://github.com/lqinfdim/EditingEvaluation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17954">arXiv:2410.17954</a> <span> [<a href="https://arxiv.org/pdf/2410.17954">pdf</a>, <a href="https://arxiv.org/format/2410.17954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ExpertFlow: Optimized Expert Activation and Token Allocation for Efficient Mixture-of-Experts Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+X">Xin He</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shunkang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuxin Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+H">Haiyan Yin</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zihao Zeng</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Shaohuai Shi</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a>, <a href="/search/cs?searchtype=author&query=Tsang%2C+I">Ivor Tsang</a>, <a href="/search/cs?searchtype=author&query=Soon%2C+O+Y">Ong Yew Soon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17954v1-abstract-short" style="display: inline;"> Sparse Mixture of Experts (MoE) models, while outperforming dense Large Language Models (LLMs) in terms of performance, face significant deployment challenges during inference due to their high memory demands. Existing offloading techniques, which involve swapping activated and idle experts between the GPU and CPU, often suffer from rigid expert caching mechanisms. These mechanisms fail to adapt t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17954v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17954v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17954v1-abstract-full" style="display: none;"> Sparse Mixture of Experts (MoE) models, while outperforming dense Large Language Models (LLMs) in terms of performance, face significant deployment challenges during inference due to their high memory demands. Existing offloading techniques, which involve swapping activated and idle experts between the GPU and CPU, often suffer from rigid expert caching mechanisms. These mechanisms fail to adapt to dynamic routing, leading to inefficient cache utilization, or incur prohibitive costs for prediction training. To tackle these inference-specific challenges, we introduce ExpertFlow, a comprehensive system specifically designed to enhance inference efficiency by accommodating flexible routing and enabling efficient expert scheduling between CPU and GPU. This reduces overhead and boosts system performance. Central to our approach is a predictive routing path-based offloading mechanism that utilizes a lightweight predictor to accurately forecast routing paths before computation begins. This proactive strategy allows for real-time error correction in expert caching, significantly increasing cache hit ratios and reducing the frequency of expert transfers, thereby minimizing I/O overhead. Additionally, we implement a dynamic token scheduling strategy that optimizes MoE inference by rearranging input tokens across different batches. This method not only reduces the number of activated experts per batch but also improves computational efficiency. Our extensive experiments demonstrate that ExpertFlow achieves up to 93.72\% GPU memory savings and enhances inference speed by 2 to 10 times compared to baseline methods, highlighting its effectiveness and utility as a robust solution for resource-constrained inference scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17954v1-abstract-full').style.display = 'none'; document.getElementById('2410.17954v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Mixture-of-Experts, Inference, Offloading</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16983">arXiv:2410.16983</a> <span> [<a href="https://arxiv.org/pdf/2410.16983">pdf</a>, <a href="https://arxiv.org/format/2410.16983">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Order Matters: Exploring Order Sensitivity in Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zhijie Tan</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Weiping Li</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+T">Tong Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16983v1-abstract-short" style="display: inline;"> Multimodal Large Language Models (MLLMs) utilize multimodal contexts consisting of text, images, or videos to solve various multimodal tasks. However, we find that changing the order of multimodal input can cause the model's performance to fluctuate between advanced performance and random guessing. This phenomenon exists in both single-modality (text-only or image-only) and mixed-modality (image-t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16983v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16983v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16983v1-abstract-full" style="display: none;"> Multimodal Large Language Models (MLLMs) utilize multimodal contexts consisting of text, images, or videos to solve various multimodal tasks. However, we find that changing the order of multimodal input can cause the model's performance to fluctuate between advanced performance and random guessing. This phenomenon exists in both single-modality (text-only or image-only) and mixed-modality (image-text-pair) contexts. Furthermore, we demonstrate that popular MLLMs pay special attention to certain multimodal context positions, particularly the beginning and end. Leveraging this special attention, we place key video frames and important image/text content in special positions within the context and submit them to the MLLM for inference. This method results in average performance gains of 14.7% for video-caption matching and 17.8% for visual question answering tasks. Additionally, we propose a new metric, Position-Invariant Accuracy (PIA), to address order bias in MLLM evaluation. Our research findings contribute to a better understanding of Multi-Modal In-Context Learning (MMICL) and provide practical strategies for enhancing MLLM performance without increasing computational costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16983v1-abstract-full').style.display = 'none'; document.getElementById('2410.16983v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12854">arXiv:2410.12854</a> <span> [<a href="https://arxiv.org/pdf/2410.12854">pdf</a>, <a href="https://arxiv.org/format/2410.12854">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TPO: Aligning Large Language Models with Multi-branch & Multi-step Preference Trees </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+W">Weibin Liao</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasha Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12854v1-abstract-short" style="display: inline;"> In the domain of complex reasoning tasks, such as mathematical reasoning, recent advancements have proposed the use of Direct Preference Optimization (DPO) to suppress output of dispreferred responses, thereby enhancing the long-chain reasoning capabilities of large language models (LLMs). To this end, these studies employed LLMs to generate preference trees via Tree-of-thoughts (ToT) and sample t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12854v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12854v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12854v1-abstract-full" style="display: none;"> In the domain of complex reasoning tasks, such as mathematical reasoning, recent advancements have proposed the use of Direct Preference Optimization (DPO) to suppress output of dispreferred responses, thereby enhancing the long-chain reasoning capabilities of large language models (LLMs). To this end, these studies employed LLMs to generate preference trees via Tree-of-thoughts (ToT) and sample the paired preference responses required by the DPO algorithm. However, the DPO algorithm based on binary preference optimization is unable to learn multiple responses with varying degrees of preference/dispreference that provided by the preference trees, resulting in incomplete preference learning. In this work, we introduce Tree Preference Optimization (TPO), that does not sample paired preference responses from the preference tree; instead, it directly learns from the entire preference tree during the fine-tuning. Specifically, TPO formulates the language model alignment as a Preference List Ranking problem, where the policy can potentially learn more effectively from a ranked preference list of responses given the prompt. In addition, to further assist LLMs in identifying discriminative steps within long-chain reasoning and increase the relative reward margin in the preference list, TPO utilizes Adaptive Step Reward to adjust the reward values of each step in trajectory for performing fine-grained preference optimization. We carry out extensive experiments on mathematical reasoning tasks to evaluate TPO. The experimental results indicate that TPO consistently outperforms DPO across three public large language models on four datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12854v1-abstract-full').style.display = 'none'; document.getElementById('2410.12854v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12707">arXiv:2410.12707</a> <span> [<a href="https://arxiv.org/pdf/2410.12707">pdf</a>, <a href="https://arxiv.org/format/2410.12707">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FusionLLM: A Decentralized LLM Training System on Geo-distributed GPUs with Adaptive Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+X">Xueze Kang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Y">Yiming Yin</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xinglin Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuxin Wang</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xin He</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiang Wang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+R">Rongfei Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kaiyong Zhao</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Shaohuai Shi</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A+C">Amelie Chi Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12707v1-abstract-short" style="display: inline;"> To alleviate hardware scarcity in training large deep neural networks (DNNs), particularly large language models (LLMs), we present FusionLLM, a decentralized training system designed and implemented for training DNNs using geo-distributed GPUs across different computing clusters or individual devices. Decentralized training faces significant challenges regarding system design and efficiency, incl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12707v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12707v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12707v1-abstract-full" style="display: none;"> To alleviate hardware scarcity in training large deep neural networks (DNNs), particularly large language models (LLMs), we present FusionLLM, a decentralized training system designed and implemented for training DNNs using geo-distributed GPUs across different computing clusters or individual devices. Decentralized training faces significant challenges regarding system design and efficiency, including: 1) the need for remote automatic differentiation (RAD), 2) support for flexible model definitions and heterogeneous software, 3) heterogeneous hardware leading to low resource utilization or the straggler problem, and 4) slow network communication. To address these challenges, in the system design, we represent the model as a directed acyclic graph of operators (OP-DAG). Each node in the DAG represents the operator in the DNNs, while the edge represents the data dependency between operators. Based on this design, 1) users are allowed to customize any DNN without caring low-level operator implementation; 2) we enable the task scheduling with the more fine-grained sub-tasks, offering more optimization space; 3) a DAG runtime executor can implement RAD withour requiring the consistent low-level ML framework versions. To enhance system efficiency, we implement a workload estimator and design an OP-Fence scheduler to cluster devices with similar bandwidths together and partition the DAG to increase throughput. Additionally, we propose an AdaTopK compressor to adaptively compress intermediate activations and gradients at the slowest communication links. To evaluate the convergence and efficiency of our system and algorithms, we train ResNet-101 and GPT-2 on three real-world testbeds using 48 GPUs connected with 8 Mbps~10 Gbps networks. Experimental results demonstrate that our system and method can achieve 1.45 - 9.39x speedup compared to baseline methods while ensuring convergence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12707v1-abstract-full').style.display = 'none'; document.getElementById('2410.12707v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10901">arXiv:2410.10901</a> <span> [<a href="https://arxiv.org/pdf/2410.10901">pdf</a>, <a href="https://arxiv.org/format/2410.10901">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> 3DS: Decomposed Difficulty Data Selection's Case Study on LLM Medical Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+H">Hongxin Ding</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yue Fang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Runchuan Zhu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xinke Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jinyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yongxin Xu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junfeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasha Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10901v1-abstract-short" style="display: inline;"> Large Language Models(LLMs) excel in general tasks but struggle in specialized domains like healthcare due to limited domain-specific knowledge.Supervised Fine-Tuning(SFT) data construction for domain adaptation often relies on heuristic methods, such as GPT-4 annotation or manual data selection, with a data-centric focus on presumed diverse, high-quality datasets. However, these methods overlook… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10901v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10901v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10901v1-abstract-full" style="display: none;"> Large Language Models(LLMs) excel in general tasks but struggle in specialized domains like healthcare due to limited domain-specific knowledge.Supervised Fine-Tuning(SFT) data construction for domain adaptation often relies on heuristic methods, such as GPT-4 annotation or manual data selection, with a data-centric focus on presumed diverse, high-quality datasets. However, these methods overlook the model's inherent knowledge distribution, introducing noise, redundancy, and irrelevant data, leading to a mismatch between the selected data and the model's learning task, resulting in suboptimal performance. To address this, we propose a two-stage model-centric data selection framework, Decomposed Difficulty Data Selection (3DS), which aligns data with the model's knowledge distribution for optimized adaptation. In Stage1, we apply Prompt-Driven Data Selection via Explicit Alignment, where the the model filters irrelevant or redundant data based on its internal knowledge. In Stage2, we perform Decomposed Difficulty Data Selection, where data selection is guided by our defined difficulty decomposition, using three metrics: Instruction Understanding, Response Confidence, and Response Correctness. Additionally, an attention-based importance weighting mechanism captures token importance for more accurate difficulty calibration. This two-stage approach ensures the selected data is not only aligned with the model's knowledge and preferences but also appropriately challenging for the model to learn, leading to more effective and targeted domain adaptation. In the case study of the medical domain, our extensive experiments on real-world healthcare datasets demonstrate the superiority of 3DS over exisiting methods in accuracy by over 5.29%. Our dataset and code will be open-sourced at https://anonymous.4open.science/r/3DS-E67F. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10901v1-abstract-full').style.display = 'none'; document.getElementById('2410.10901v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10360">arXiv:2410.10360</a> <span> [<a href="https://arxiv.org/pdf/2410.10360">pdf</a>, <a href="https://arxiv.org/format/2410.10360">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Parenting: Optimizing Knowledge Selection of Retrieval-Augmented Language Models with Parameter Decoupling and Tailored Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yongxin Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruizhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xinke Jiang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yujie Feng</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Y">Yuzhen Xiao</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Runchuan Zhu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junfeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasha Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10360v2-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) offers an effective solution to the issues faced by Large Language Models (LLMs) in hallucination generation and knowledge obsolescence by incorporating externally retrieved knowledge. However, existing methods lack effective control mechanisms for integrating internal and external knowledge. Inspired by human cognitive processes, we propose Parenting, a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10360v2-abstract-full').style.display = 'inline'; document.getElementById('2410.10360v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10360v2-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) offers an effective solution to the issues faced by Large Language Models (LLMs) in hallucination generation and knowledge obsolescence by incorporating externally retrieved knowledge. However, existing methods lack effective control mechanisms for integrating internal and external knowledge. Inspired by human cognitive processes, we propose Parenting, a novel framework that decouples, identifies, and purposefully optimizes parameter subspaces related to adherence and robustness. Specifically, Parenting utilizes a key parameter mining method that combines forward and backward propagation signals to localize subspaces representing different capabilities. Then, Parenting employs a type-tailored tuning strategy, applying specific and appropriate optimizations to different subspaces, aiming to achieve a balanced enhancement of both adherence and robustness. Extensive experiments on various datasets and models validate the effectiveness and generalizability of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10360v2-abstract-full').style.display = 'none'; document.getElementById('2410.10360v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08799">arXiv:2410.08799</a> <span> [<a href="https://arxiv.org/pdf/2410.08799">pdf</a>, <a href="https://arxiv.org/ps/2410.08799">ps</a>, <a href="https://arxiv.org/format/2410.08799">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Online Learning for Intelligent Thermal Management of Interference-coupled and Passively Cooled Base Stations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhanwei Yu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yi Zhao</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaoli Chu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+D">Di Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08799v1-abstract-short" style="display: inline;"> Passively cooled base stations (PCBSs) have emerged to deliver better cost and energy efficiency. However, passive cooling necessitates intelligent thermal control via traffic management, i.e., the instantaneous data traffic or throughput of a PCBS directly impacts its thermal performance. This is particularly challenging for outdoor deployment of PCBSs because the heat dissipation efficiency is u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08799v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08799v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08799v1-abstract-full" style="display: none;"> Passively cooled base stations (PCBSs) have emerged to deliver better cost and energy efficiency. However, passive cooling necessitates intelligent thermal control via traffic management, i.e., the instantaneous data traffic or throughput of a PCBS directly impacts its thermal performance. This is particularly challenging for outdoor deployment of PCBSs because the heat dissipation efficiency is uncertain and fluctuates over time. What is more, the PCBSs are interference-coupled in multi-cell scenarios. Thus, a higher-throughput PCBS leads to higher interference to the other PCBSs, which, in turn, would require more resource consumption to meet their respective throughput targets. In this paper, we address online decision-making for maximizing the total downlink throughput for a multi-PCBS system subject to constraints related on operating temperature. We demonstrate that a reinforcement learning (RL) approach, specifically soft actor-critic (SAC), can successfully perform throughput maximization while keeping the PCBSs cool, by adapting the throughput to time-varying heat dissipation conditions. Furthermore, we design a denial and reward mechanism that effectively mitigates the risk of overheating during the exploration phase of RL. Simulation results show that our approach achieves up to 88.6% of the global optimum. This is very promising, as our approach operates without prior knowledge of future heat dissipation efficiency, which is required by the global optimum. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08799v1-abstract-full').style.display = 'none'; document.getElementById('2410.08799v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07971">arXiv:2410.07971</a> <span> [<a href="https://arxiv.org/pdf/2410.07971">pdf</a>, <a href="https://arxiv.org/format/2410.07971">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Generalizable and Animatable Gaussian Head Avatar </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xuangeng Chu</a>, <a href="/search/cs?searchtype=author&query=Harada%2C+T">Tatsuya Harada</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07971v1-abstract-short" style="display: inline;"> In this paper, we propose Generalizable and Animatable Gaussian head Avatar (GAGAvatar) for one-shot animatable head avatar reconstruction. Existing methods rely on neural radiance fields, leading to heavy rendering consumption and low reenactment speeds. To address these limitations, we generate the parameters of 3D Gaussians from a single image in a single forward pass. The key innovation of our… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07971v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07971v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07971v1-abstract-full" style="display: none;"> In this paper, we propose Generalizable and Animatable Gaussian head Avatar (GAGAvatar) for one-shot animatable head avatar reconstruction. Existing methods rely on neural radiance fields, leading to heavy rendering consumption and low reenactment speeds. To address these limitations, we generate the parameters of 3D Gaussians from a single image in a single forward pass. The key innovation of our work is the proposed dual-lifting method, which produces high-fidelity 3D Gaussians that capture identity and facial details. Additionally, we leverage global image features and the 3D morphable model to construct 3D Gaussians for controlling expressions. After training, our model can reconstruct unseen identities without specific optimizations and perform reenactment rendering at real-time speeds. Experiments show that our method exhibits superior performance compared to previous methods in terms of reconstruction quality and expression accuracy. We believe our method can establish new benchmarks for future research and advance applications of digital avatars. Code and demos are available https://github.com/xg-chu/GAGAvatar. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07971v1-abstract-full').style.display = 'none'; document.getElementById('2410.07971v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024, code is available at https://github.com/xg-chu/GAGAvatar, more demos are available at https://xg-chu.site/project_gagavatar</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04808">arXiv:2410.04808</a> <span> [<a href="https://arxiv.org/pdf/2410.04808">pdf</a>, <a href="https://arxiv.org/format/2410.04808">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LPZero: Language Model Zero-cost Proxy Search from Zero </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+P">Peijie Dong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lujun Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiang Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xuebo Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiang Wang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04808v1-abstract-short" style="display: inline;"> In spite of the outstanding performance, Neural Architecture Search (NAS) is criticized for massive computation. Recently, Zero-shot NAS has emerged as a promising approach by exploiting Zero-cost (ZC) proxies, which markedly reduce computational demands. Despite this, existing ZC proxies heavily rely on expert knowledge and incur significant trial-and-error costs. Particularly in NLP tasks, most… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04808v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04808v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04808v1-abstract-full" style="display: none;"> In spite of the outstanding performance, Neural Architecture Search (NAS) is criticized for massive computation. Recently, Zero-shot NAS has emerged as a promising approach by exploiting Zero-cost (ZC) proxies, which markedly reduce computational demands. Despite this, existing ZC proxies heavily rely on expert knowledge and incur significant trial-and-error costs. Particularly in NLP tasks, most existing ZC proxies fail to surpass the performance of the naive baseline. To address these challenges, we introduce a novel framework, \textbf{LPZero}, which is the first to automatically design ZC proxies for various tasks, achieving higher ranking consistency than human-designed proxies. Specifically, we model the ZC proxy as a symbolic equation and incorporate a unified proxy search space that encompasses existing ZC proxies, which are composed of a predefined set of mathematical symbols. To heuristically search for the best ZC proxy, LPZero incorporates genetic programming to find the optimal symbolic composition. We propose a \textit{Rule-based Pruning Strategy (RPS),} which preemptively eliminates unpromising proxies, thereby mitigating the risk of proxy degradation. Extensive experiments on FlexiBERT, GPT-2, and LLaMA-7B demonstrate LPZero's superior ranking ability and performance on downstream tasks compared to current approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04808v1-abstract-full').style.display = 'none'; document.getElementById('2410.04808v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 7 figures, 10 appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04199">arXiv:2410.04199</a> <span> [<a href="https://arxiv.org/pdf/2410.04199">pdf</a>, <a href="https://arxiv.org/format/2410.04199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LongGenBench: Long-context Generation Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiang Liu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+P">Peijie Dong</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuming Hu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04199v3-abstract-short" style="display: inline;"> Current long-context benchmarks primarily focus on retrieval-based tests, requiring Large Language Models (LLMs) to locate specific information within extensive input contexts, such as the needle-in-a-haystack (NIAH) benchmark. Long-context generation refers to the ability of a language model to generate coherent and contextually accurate text that spans across lengthy passages or documents. While… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04199v3-abstract-full').style.display = 'inline'; document.getElementById('2410.04199v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04199v3-abstract-full" style="display: none;"> Current long-context benchmarks primarily focus on retrieval-based tests, requiring Large Language Models (LLMs) to locate specific information within extensive input contexts, such as the needle-in-a-haystack (NIAH) benchmark. Long-context generation refers to the ability of a language model to generate coherent and contextually accurate text that spans across lengthy passages or documents. While recent studies show strong performance on NIAH and other retrieval-based long-context benchmarks, there is a significant lack of benchmarks for evaluating long-context generation capabilities. To bridge this gap and offer a comprehensive assessment, we introduce a synthetic benchmark, LongGenBench, which allows for flexible configurations of customized generation context lengths. LongGenBench advances beyond traditional benchmarks by redesigning the format of questions and necessitating that LLMs respond with a single, cohesive long-context answer. Upon extensive evaluation using LongGenBench, we observe that: (1) both API accessed and open source models exhibit performance degradation in long-context generation scenarios, ranging from 1.2% to 47.1%; (2) different series of LLMs exhibit varying trends of performance degradation, with the Gemini-1.5-Flash model showing the least degradation among API accessed models, and the Qwen2 series exhibiting the least degradation in LongGenBench among open source models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04199v3-abstract-full').style.display = 'none'; document.getElementById('2410.04199v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024 https://github.com/Dominic789654/LongGenBench</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19597">arXiv:2409.19597</a> <span> [<a href="https://arxiv.org/pdf/2409.19597">pdf</a>, <a href="https://arxiv.org/format/2409.19597">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> CELLmap: Enhancing LiDAR SLAM through Elastic and Lightweight Spherical Map Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Duan%2C+Y">Yifan Duan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinran Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yao Li</a>, <a href="/search/cs?searchtype=author&query=You%2C+G">Guoliang You</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaomeng Chu</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+J">Jianmin Ji</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanyong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19597v1-abstract-short" style="display: inline;"> SLAM is a fundamental capability of unmanned systems, with LiDAR-based SLAM gaining widespread adoption due to its high precision. Current SLAM systems can achieve centimeter-level accuracy within a short period. However, there are still several challenges when dealing with largescale mapping tasks including significant storage requirements and difficulty of reusing the constructed maps. To addres… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19597v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19597v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19597v1-abstract-full" style="display: none;"> SLAM is a fundamental capability of unmanned systems, with LiDAR-based SLAM gaining widespread adoption due to its high precision. Current SLAM systems can achieve centimeter-level accuracy within a short period. However, there are still several challenges when dealing with largescale mapping tasks including significant storage requirements and difficulty of reusing the constructed maps. To address this, we first design an elastic and lightweight map representation called CELLmap, composed of several CELLs, each representing the local map at the corresponding location. Then, we design a general backend including CELL-based bidirectional registration module and loop closure detection module to improve global map consistency. Our experiments have demonstrated that CELLmap can represent the precise geometric structure of large-scale maps of KITTI dataset using only about 60 MB. Additionally, our general backend achieves up to a 26.88% improvement over various LiDAR odometry methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19597v1-abstract-full').style.display = 'none'; document.getElementById('2409.19597v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14170">arXiv:2409.14170</a> <span> [<a href="https://arxiv.org/pdf/2409.14170">pdf</a>, <a href="https://arxiv.org/format/2409.14170">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LFP: Efficient and Accurate End-to-End Lane-Level Planning via Camera-LiDAR Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=You%2C+G">Guoliang You</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaomeng Chu</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+Y">Yifan Duan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingchen Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sha Zhang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+J">Jianmin Ji</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanyong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14170v1-abstract-short" style="display: inline;"> Multi-modal systems enhance performance in autonomous driving but face inefficiencies due to indiscriminate processing within each modality. Additionally, the independent feature learning of each modality lacks interaction, which results in extracted features that do not possess the complementary characteristics. These issue increases the cost of fusing redundant information across modalities. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14170v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14170v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14170v1-abstract-full" style="display: none;"> Multi-modal systems enhance performance in autonomous driving but face inefficiencies due to indiscriminate processing within each modality. Additionally, the independent feature learning of each modality lacks interaction, which results in extracted features that do not possess the complementary characteristics. These issue increases the cost of fusing redundant information across modalities. To address these challenges, we propose targeting driving-relevant elements, which reduces the volume of LiDAR features while preserving critical information. This approach enhances lane level interaction between the image and LiDAR branches, allowing for the extraction and fusion of their respective advantageous features. Building upon the camera-only framework PHP, we introduce the Lane-level camera-LiDAR Fusion Planning (LFP) method, which balances efficiency with performance by using lanes as the unit for sensor fusion. Specifically, we design three modules to enhance efficiency and performance. For efficiency, we propose an image-guided coarse lane prior generation module that forecasts the region of interest (ROI) for lanes and assigns a confidence score, guiding LiDAR processing. The LiDAR feature extraction modules leverages lane-aware priors from the image branch to guide sampling for pillar, retaining essential pillars. For performance, the lane-level cross-modal query integration and feature enhancement module uses confidence score from ROI to combine low-confidence image queries with LiDAR queries, extracting complementary depth features. These features enhance the low-confidence image features, compensating for the lack of depth. Experiments on the Carla benchmarks show that our method achieves state-of-the-art performance in both driving score and infraction score, with maximum improvement of 15% and 14% over existing algorithms, respectively, maintaining high frame rate of 19.27 FPS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14170v1-abstract-full').style.display = 'none'; document.getElementById('2409.14170v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11694">arXiv:2409.11694</a> <span> [<a href="https://arxiv.org/pdf/2409.11694">pdf</a>, <a href="https://arxiv.org/format/2409.11694">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> From Words to Wheels: Automated Style-Customized Policy Generation for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xianda Chen</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Z">Zhenghan Cai</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+P">Pinlong Cai</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+M">Meixin Zhu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11694v1-abstract-short" style="display: inline;"> Autonomous driving technology has witnessed rapid advancements, with foundation models improving interactivity and user experiences. However, current autonomous vehicles (AVs) face significant limitations in delivering command-based driving styles. Most existing methods either rely on predefined driving styles that require expert input or use data-driven techniques like Inverse Reinforcement Learn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11694v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11694v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11694v1-abstract-full" style="display: none;"> Autonomous driving technology has witnessed rapid advancements, with foundation models improving interactivity and user experiences. However, current autonomous vehicles (AVs) face significant limitations in delivering command-based driving styles. Most existing methods either rely on predefined driving styles that require expert input or use data-driven techniques like Inverse Reinforcement Learning to extract styles from driving data. These approaches, though effective in some cases, face challenges: difficulty obtaining specific driving data for style matching (e.g., in Robotaxis), inability to align driving style metrics with user preferences, and limitations to pre-existing styles, restricting customization and generalization to new commands. This paper introduces Words2Wheels, a framework that automatically generates customized driving policies based on natural language user commands. Words2Wheels employs a Style-Customized Reward Function to generate a Style-Customized Driving Policy without relying on prior driving data. By leveraging large language models and a Driving Style Database, the framework efficiently retrieves, adapts, and generalizes driving styles. A Statistical Evaluation module ensures alignment with user preferences. Experimental results demonstrate that Words2Wheels outperforms existing methods in accuracy, generalization, and adaptability, offering a novel solution for customized AV driving behavior. Code and demo available at https://yokhon.github.io/Words2Wheels/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11694v1-abstract-full').style.display = 'none'; document.getElementById('2409.11694v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08949">arXiv:2409.08949</a> <span> [<a href="https://arxiv.org/pdf/2409.08949">pdf</a>, <a href="https://arxiv.org/format/2409.08949">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Generic and ML Workloads in an HPC Datacenter: Node Energy, Job Failures, and Node-Job Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaoyu Chu</a>, <a href="/search/cs?searchtype=author&query=Hofst%C3%A4tter%2C+D">Daniel Hofst盲tter</a>, <a href="/search/cs?searchtype=author&query=Ilager%2C+S">Shashikant Ilager</a>, <a href="/search/cs?searchtype=author&query=Talluri%2C+S">Sacheendra Talluri</a>, <a href="/search/cs?searchtype=author&query=Kampert%2C+D">Duncan Kampert</a>, <a href="/search/cs?searchtype=author&query=Podareanu%2C+D">Damian Podareanu</a>, <a href="/search/cs?searchtype=author&query=Duplyakin%2C+D">Dmitry Duplyakin</a>, <a href="/search/cs?searchtype=author&query=Brandic%2C+I">Ivona Brandic</a>, <a href="/search/cs?searchtype=author&query=Iosup%2C+A">Alexandru Iosup</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08949v1-abstract-short" style="display: inline;"> HPC datacenters offer a backbone to the modern digital society. Increasingly, they run Machine Learning (ML) jobs next to generic, compute-intensive workloads, supporting science, business, and other decision-making processes. However, understanding how ML jobs impact the operation of HPC datacenters, relative to generic jobs, remains desirable but understudied. In this work, we leverage long-term… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08949v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08949v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08949v1-abstract-full" style="display: none;"> HPC datacenters offer a backbone to the modern digital society. Increasingly, they run Machine Learning (ML) jobs next to generic, compute-intensive workloads, supporting science, business, and other decision-making processes. However, understanding how ML jobs impact the operation of HPC datacenters, relative to generic jobs, remains desirable but understudied. In this work, we leverage long-term operational data, collected from a national-scale production HPC datacenter, and statistically compare how ML and generic jobs can impact the performance, failures, resource utilization, and energy consumption of HPC datacenters. Our study provides key insights, e.g., ML-related power usage causes GPU nodes to run into temperature limitations, median/mean runtime and failure rates are higher for ML jobs than for generic jobs, both ML and generic jobs exhibit highly variable arrival processes and resource demands, significant amounts of energy are spent on unsuccessfully terminating jobs, and concurrent jobs tend to terminate in the same state. We open-source our cleaned-up data traces on Zenodo (https://doi.org/10.5281/zenodo.13685426), and provide our analysis toolkit as software hosted on GitHub (https://github.com/atlarge-research/2024-icpads-hpc-workload-characterization). This study offers multiple benefits for data center administrators, who can improve operational efficiency, and for researchers, who can further improve system designs, scheduling techniques, etc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08949v1-abstract-full').style.display = 'none'; document.getElementById('2409.08949v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 figures, 6 tables, ICPADS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15484">arXiv:2408.15484</a> <span> [<a href="https://arxiv.org/pdf/2408.15484">pdf</a>, <a href="https://arxiv.org/format/2408.15484">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NAS-BNN: Neural Architecture Search for Binary Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhihao Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yongtao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jinhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaojie Chu</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+H">Haibin Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15484v1-abstract-short" style="display: inline;"> Binary Neural Networks (BNNs) have gained extensive attention for their superior inferencing efficiency and compression ratio compared to traditional full-precision networks. However, due to the unique characteristics of BNNs, designing a powerful binary architecture is challenging and often requires significant manpower. A promising solution is to utilize Neural Architecture Search (NAS) to assis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15484v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15484v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15484v1-abstract-full" style="display: none;"> Binary Neural Networks (BNNs) have gained extensive attention for their superior inferencing efficiency and compression ratio compared to traditional full-precision networks. However, due to the unique characteristics of BNNs, designing a powerful binary architecture is challenging and often requires significant manpower. A promising solution is to utilize Neural Architecture Search (NAS) to assist in designing BNNs, but current NAS methods for BNNs are relatively straightforward and leave a performance gap between the searched models and manually designed ones. To address this gap, we propose a novel neural architecture search scheme for binary neural networks, named NAS-BNN. We first carefully design a search space based on the unique characteristics of BNNs. Then, we present three training strategies, which significantly enhance the training of supernet and boost the performance of all subnets. Our discovered binary model family outperforms previous BNNs for a wide range of operations (OPs) from 20M to 200M. For instance, we achieve 68.20% top-1 accuracy on ImageNet with only 57M OPs. In addition, we validate the transferability of these searched BNNs on the object detection task, and our binary detectors with the searched BNNs achieve a novel state-of-the-art result, e.g., 31.6% mAP with 370M OPs, on MS COCO dataset. The source code and models will be released at https://github.com/VDIGPKU/NAS-BNN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15484v1-abstract-full').style.display = 'none'; document.getElementById('2408.15484v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14736">arXiv:2408.14736</a> <span> [<a href="https://arxiv.org/pdf/2408.14736">pdf</a>, <a href="https://arxiv.org/format/2408.14736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3673038.3673142">10.1145/3673038.3673142 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Bandwidth-Aware and Overlap-Weighted Compression for Communication-Efficient Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zichen Tang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Junlin Huang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+R">Rudan Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuxin Wang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Shaohuai Shi</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A+C">Amelie Chi Zhou</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14736v1-abstract-short" style="display: inline;"> Current data compression methods, such as sparsification in Federated Averaging (FedAvg), effectively enhance the communication efficiency of Federated Learning (FL). However, these methods encounter challenges such as the straggler problem and diminished model performance due to heterogeneous bandwidth and non-IID (Independently and Identically Distributed) data. To address these issues, we intro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14736v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14736v1-abstract-full" style="display: none;"> Current data compression methods, such as sparsification in Federated Averaging (FedAvg), effectively enhance the communication efficiency of Federated Learning (FL). However, these methods encounter challenges such as the straggler problem and diminished model performance due to heterogeneous bandwidth and non-IID (Independently and Identically Distributed) data. To address these issues, we introduce a bandwidth-aware compression framework for FL, aimed at improving communication efficiency while mitigating the problems associated with non-IID data. First, our strategy dynamically adjusts compression ratios according to bandwidth, enabling clients to upload their models at a close pace, thus exploiting the otherwise wasted time to transmit more data. Second, we identify the non-overlapped pattern of retained parameters after compression, which results in diminished client update signals due to uniformly averaged weights. Based on this finding, we propose a parameter mask to adjust the client-averaging coefficients at the parameter level, thereby more closely approximating the original updates, and improving the training convergence under heterogeneous environments. Our evaluations reveal that our method significantly boosts model accuracy, with a maximum improvement of 13% over the uncompressed FedAvg. Moreover, it achieves a $3.37\times$ speedup in reaching the target accuracy compared to FedAvg with a Top-K compressor, demonstrating its effectiveness in accelerating convergence with compression. The integration of common compression techniques into our framework further establishes its potential as a versatile foundation for future cross-device, communication-efficient FL research, addressing critical challenges in FL and advancing the field of distributed machine learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14736v1-abstract-full').style.display = 'none'; document.getElementById('2408.14736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13073">arXiv:2408.13073</a> <span> [<a href="https://arxiv.org/pdf/2408.13073">pdf</a>, <a href="https://arxiv.org/format/2408.13073">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> IntelliCare: Improving Healthcare Analysis with Variance-Controlled Patient-Level Knowledge from Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhihao Yu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yujie Jin</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yongxin Xu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasha Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junfeng Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13073v1-abstract-short" style="display: inline;"> While pioneering deep learning methods have made great strides in analyzing electronic health record (EHR) data, they often struggle to fully capture the semantics of diverse medical codes from limited data. The integration of external knowledge from Large Language Models (LLMs) presents a promising avenue for improving healthcare predictions. However, LLM analyses may exhibit significant variance… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13073v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13073v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13073v1-abstract-full" style="display: none;"> While pioneering deep learning methods have made great strides in analyzing electronic health record (EHR) data, they often struggle to fully capture the semantics of diverse medical codes from limited data. The integration of external knowledge from Large Language Models (LLMs) presents a promising avenue for improving healthcare predictions. However, LLM analyses may exhibit significant variance due to ambiguity problems and inconsistency issues, hindering their effective utilization. To address these challenges, we propose IntelliCare, a novel framework that leverages LLMs to provide high-quality patient-level external knowledge and enhance existing EHR models. Concretely, IntelliCare identifies patient cohorts and employs task-relevant statistical information to augment LLM understanding and generation, effectively mitigating the ambiguity problem. Additionally, it refines LLM-derived knowledge through a hybrid approach, generating multiple analyses and calibrating them using both the EHR model and perplexity measures. Experimental evaluations on three clinical prediction tasks across two large-scale EHR datasets demonstrate that IntelliCare delivers significant performance improvements to existing methods, highlighting its potential in advancing personalized healthcare predictions and decision support systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13073v1-abstract-full').style.display = 'none'; document.getElementById('2408.13073v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09857">arXiv:2408.09857</a> <span> [<a href="https://arxiv.org/pdf/2408.09857">pdf</a>, <a href="https://arxiv.org/format/2408.09857">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TaSL: Continual Dialog State Tracking via Task Skill Localization and Consolidation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yujie Feng</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yongxin Xu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+G">Guangyuan Shi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiao-Ming Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09857v1-abstract-short" style="display: inline;"> A practical dialogue system requires the capacity for ongoing skill acquisition and adaptability to new tasks while preserving prior knowledge. However, current methods for Continual Dialogue State Tracking (DST), a crucial function of dialogue systems, struggle with the catastrophic forgetting issue and knowledge transfer between tasks. We present TaSL, a novel framework for task skill localizati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09857v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09857v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09857v1-abstract-full" style="display: none;"> A practical dialogue system requires the capacity for ongoing skill acquisition and adaptability to new tasks while preserving prior knowledge. However, current methods for Continual Dialogue State Tracking (DST), a crucial function of dialogue systems, struggle with the catastrophic forgetting issue and knowledge transfer between tasks. We present TaSL, a novel framework for task skill localization and consolidation that enables effective knowledge transfer without relying on memory replay. TaSL uses a novel group-wise technique to pinpoint task-specific and task-shared areas. Additionally, a fine-grained skill consolidation strategy protects task-specific knowledge from being forgotten while updating shared knowledge for bi-directional knowledge transfer. As a result, TaSL strikes a balance between preserving previous knowledge and excelling at new tasks. Comprehensive experiments on various backbones highlight the significant performance improvements of TaSL over existing state-of-the-art methods. The source code is provided for reproducibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09857v1-abstract-full').style.display = 'none'; document.getElementById('2408.09857v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2024 Main Conference. arXiv admin note: text overlap with arXiv:2408.05200</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09199">arXiv:2408.09199</a> <span> [<a href="https://arxiv.org/pdf/2408.09199">pdf</a>, <a href="https://arxiv.org/format/2408.09199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> TC-RAG:Turing-Complete RAG's Case study on Medical LLM Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xinke Jiang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yue Fang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+R">Rihong Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haoyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yongxin Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wentao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruizhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yuchen Fang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junfeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasha Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09199v1-abstract-short" style="display: inline;"> In the pursuit of enhancing domain-specific Large Language Models (LLMs), Retrieval-Augmented Generation (RAG) emerges as a promising solution to mitigate issues such as hallucinations, outdated knowledge, and limited expertise in highly specialized queries. However, existing approaches to RAG fall short by neglecting system state variables, which are crucial for ensuring adaptive control, retriev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09199v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09199v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09199v1-abstract-full" style="display: none;"> In the pursuit of enhancing domain-specific Large Language Models (LLMs), Retrieval-Augmented Generation (RAG) emerges as a promising solution to mitigate issues such as hallucinations, outdated knowledge, and limited expertise in highly specialized queries. However, existing approaches to RAG fall short by neglecting system state variables, which are crucial for ensuring adaptive control, retrieval halting, and system convergence. In this paper, we introduce the TC-RAG through rigorous proof, a novel framework that addresses these challenges by incorporating a Turing Complete System to manage state variables, thereby enabling more efficient and accurate knowledge retrieval. By leveraging a memory stack system with adaptive retrieval, reasoning, and planning capabilities, TC-RAG not only ensures the controlled halting of retrieval processes but also mitigates the accumulation of erroneous knowledge via Push and Pop actions. In the case study of the medical domain, our extensive experiments on real-world healthcare datasets demonstrate the superiority of TC-RAG over existing methods in accuracy by over 7.20\%. Our dataset and code have been available at https://https://github.com/Artessay/SAMA.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09199v1-abstract-full').style.display = 'none'; document.getElementById('2408.09199v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">version 1.0</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08147">arXiv:2408.08147</a> <span> [<a href="https://arxiv.org/pdf/2408.08147">pdf</a>, <a href="https://arxiv.org/format/2408.08147">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> P/D-Serve: Serving Disaggregated Large Language Model at Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yibo Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Huimin Lin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+M">Mingyang Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Peiyang Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yipeng Ma</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+Y">Yicheng Shan</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Z">Zhengfan Yuan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Cailong Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yajing Sun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Tiandeng Wu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xing Chu</a>, <a href="/search/cs?searchtype=author&query=Huan%2C+R">Ruizhi Huan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Li Ma</a>, <a href="/search/cs?searchtype=author&query=You%2C+X">Xiao You</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wenting Zhou</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yunpeng Ye</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wen Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiangkun Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongsheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+T">Tiantian Dong</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jiawei Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhe Wang</a>, <a href="/search/cs?searchtype=author&query=Ju%2C+X">Xijian Ju</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">Jianxun Song</a> , et al. (5 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08147v1-abstract-short" style="display: inline;"> Serving disaggregated large language models (LLMs) over tens of thousands of xPU devices (GPUs or NPUs) with reliable performance faces multiple challenges. 1) Ignoring the diversity (various prefixes and tidal requests), treating all the prompts in a mixed pool is inadequate. To facilitate the similarity per scenario and minimize the inner mismatch on P/D (prefill and decoding) processing, fine-g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08147v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08147v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08147v1-abstract-full" style="display: none;"> Serving disaggregated large language models (LLMs) over tens of thousands of xPU devices (GPUs or NPUs) with reliable performance faces multiple challenges. 1) Ignoring the diversity (various prefixes and tidal requests), treating all the prompts in a mixed pool is inadequate. To facilitate the similarity per scenario and minimize the inner mismatch on P/D (prefill and decoding) processing, fine-grained organization is required, dynamically adjusting P/D ratios for better performance. 2) Due to inaccurate estimation on workload (queue status or maintained connections), the global scheduler easily incurs unnecessary timeouts in prefill. 3) Block-fixed device-to-device (D2D) KVCache transfer over cluster-level RDMA (remote direct memory access) fails to achieve desired D2D utilization as expected. To overcome previous problems, this paper proposes an end-to-end system P/D-Serve, complying with the paradigm of MLOps (machine learning operations), which models end-to-end (E2E) P/D performance and enables: 1) fine-grained P/D organization, mapping the service with RoCE (RDMA over converged ethernet) as needed, to facilitate similar processing and dynamic adjustments on P/D ratios; 2) on-demand forwarding upon rejections for idle prefill, decoupling the scheduler from regular inaccurate reports and local queues, to avoid timeouts in prefill; and 3) efficient KVCache transfer via optimized D2D access. P/D-Serve is implemented upon Ascend and MindSpore, has been deployed over tens of thousands of NPUs for more than eight months in commercial use, and further achieves 60\%, 42\% and 46\% improvements on E2E throughput, time-to-first-token (TTFT) SLO (service level objective) and D2D transfer time. As the E2E system with optimizations, P/D-Serve achieves 6.7x increase on throughput, compared with aggregated LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08147v1-abstract-full').style.display = 'none'; document.getElementById('2408.08147v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08015">arXiv:2408.08015</a> <span> [<a href="https://arxiv.org/pdf/2408.08015">pdf</a>, <a href="https://arxiv.org/format/2408.08015">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Asteroid: Resource-Efficient Hybrid Pipeline Parallelism for Collaborative DNN Training on Heterogeneous Edge Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+S">Shengyuan Ye</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+L">Liekang Zeng</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+G">Guoliang Xing</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08015v1-abstract-short" style="display: inline;"> On-device Deep Neural Network (DNN) training has been recognized as crucial for privacy-preserving machine learning at the edge. However, the intensive training workload and limited onboard computing resources pose significant challenges to the availability and efficiency of model training. While existing works address these challenges through native resource management optimization, we instead le… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08015v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08015v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08015v1-abstract-full" style="display: none;"> On-device Deep Neural Network (DNN) training has been recognized as crucial for privacy-preserving machine learning at the edge. However, the intensive training workload and limited onboard computing resources pose significant challenges to the availability and efficiency of model training. While existing works address these challenges through native resource management optimization, we instead leverage our observation that edge environments usually comprise a rich set of accompanying trusted edge devices with idle resources beyond a single terminal. We propose Asteroid, a distributed edge training system that breaks the resource walls across heterogeneous edge devices for efficient model training acceleration. Asteroid adopts a hybrid pipeline parallelism to orchestrate distributed training, along with a judicious parallelism planning for maximizing throughput under certain resource constraints. Furthermore, a fault-tolerant yet lightweight pipeline replay mechanism is developed to tame the device-level dynamics for training robustness and performance stability. We implement Asteroid on heterogeneous edge devices with both vision and language models, demonstrating up to 12.2x faster training than conventional parallelism methods and 2.1x faster than state-of-the-art hybrid parallelism methods through evaluations. Furthermore, Asteroid can recover training pipeline 14x faster than baseline methods while preserving comparable throughput despite unexpected device exiting and failure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08015v1-abstract-full').style.display = 'none'; document.getElementById('2408.08015v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by The 30th Annual International Conference on Mobile Computing and Networking (MobiCom'24)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05200">arXiv:2408.05200</a> <span> [<a href="https://arxiv.org/pdf/2408.05200">pdf</a>, <a href="https://arxiv.org/format/2408.05200">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TaSL: Task Skill Localization and Consolidation for Language Model Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yujie Feng</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yongxin Xu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zexin Lu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P+S">Philip S. Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiao-Ming Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05200v2-abstract-short" style="display: inline;"> Language model continual learning (CL) has recently attracted significant interest for its ability to adapt large language models (LLMs) to dynamic real-world scenarios without retraining. A major challenge in this domain is catastrophic forgetting, where models lose previously acquired knowledge upon learning new tasks. Existing approaches commonly utilize multiple parameter-efficient fine-tuning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05200v2-abstract-full').style.display = 'inline'; document.getElementById('2408.05200v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05200v2-abstract-full" style="display: none;"> Language model continual learning (CL) has recently attracted significant interest for its ability to adapt large language models (LLMs) to dynamic real-world scenarios without retraining. A major challenge in this domain is catastrophic forgetting, where models lose previously acquired knowledge upon learning new tasks. Existing approaches commonly utilize multiple parameter-efficient fine-tuning (PEFT) blocks to acquire task-specific knowledge, yet these methods are inefficient and fail to leverage potential knowledge transfer across tasks. In this paper, we introduce a novel CL framework for language models, named Task Skill Localization and Consolidation (TaSL), which boosts knowledge transfer without depending on memory replay. TaSL initially segregates the model into 'skill units' based on parameter dependencies, allowing for more precise control. Subsequently, it employs a novel group-wise skill localization technique to ascertain the importance distribution of skill units for a new task. By comparing this importance distribution with those from previous tasks, we implement a fine-grained skill consolidation strategy that retains task-specific knowledge, thereby preventing forgetting, and updates task-shared knowledge, which facilitates bi-directional knowledge transfer. As a result, TaSL achieves an optimal balance between retaining prior knowledge and excelling in new tasks. TaSL also demonstrates strong generalizability, making it suitable for various base models and adaptable to PEFT methods like LoRA. Furthermore, it offers notable extensibility, supporting enhancements through integration with memory replay techniques. Comprehensive experiments conducted on two CL benchmarks, involving models ranging from 220M to 7B parameters, affirm the effectiveness of TaSL and its variants across different settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05200v2-abstract-full').style.display = 'none'; document.getElementById('2408.05200v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Extension of ACL 2024 paper titled: Continual Dialog State Tracking via Task Skill Localization and Consolidation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05008">arXiv:2408.05008</a> <span> [<a href="https://arxiv.org/pdf/2408.05008">pdf</a>, <a href="https://arxiv.org/format/2408.05008">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FlowDreamer: Exploring High Fidelity Text-to-3D Generation via Rectified Flow </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Hangyu Li</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiangxiang Chu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+D">Dingyuan Shi</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05008v3-abstract-short" style="display: inline;"> Recent advances in text-to-3D generation have made significant progress. In particular, with the pretrained diffusion models, existing methods predominantly use Score Distillation Sampling (SDS) to train 3D models such as Neural RaRecent advances in text-to-3D generation have made significant progress. In particular, with the pretrained diffusion models, existing methods predominantly use Score Di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05008v3-abstract-full').style.display = 'inline'; document.getElementById('2408.05008v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05008v3-abstract-full" style="display: none;"> Recent advances in text-to-3D generation have made significant progress. In particular, with the pretrained diffusion models, existing methods predominantly use Score Distillation Sampling (SDS) to train 3D models such as Neural RaRecent advances in text-to-3D generation have made significant progress. In particular, with the pretrained diffusion models, existing methods predominantly use Score Distillation Sampling (SDS) to train 3D models such as Neural Radiance Fields (NeRF) and 3D Gaussian Splatting (3D GS). However, a hurdle is that they often encounter difficulties with over-smoothing textures and over-saturating colors. The rectified flow model -- which utilizes a simple ordinary differential equation (ODE) to represent a straight trajectory -- shows promise as an alternative prior to text-to-3D generation. It learns a time-independent vector field, thereby reducing the ambiguity in 3D model update gradients that are calculated using time-dependent scores in the SDS framework. In light of this, we first develop a mathematical analysis to seamlessly integrate SDS with rectified flow model, paving the way for our initial framework known as Vector Field Distillation Sampling (VFDS). However, empirical findings indicate that VFDS still results in over-smoothing outcomes. Therefore, we analyze the grounding reasons for such a failure from the perspective of ODE trajectories. On top, we propose a novel framework, named FlowDreamer, which yields high fidelity results with richer textual details and faster convergence. The key insight is to leverage the coupling and reversible properties of the rectified flow model to search for the corresponding noise, rather than using randomly sampled noise as in VFDS. Accordingly, we introduce a novel Unique Couple Matching (UCM) loss, which guides the 3D model to optimize along the same trajectory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05008v3-abstract-full').style.display = 'none'; document.getElementById('2408.05008v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Tech Report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04637">arXiv:2408.04637</a> <span> [<a href="https://arxiv.org/pdf/2408.04637">pdf</a>, <a href="https://arxiv.org/format/2408.04637">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> APE: Active Learning-based Tooling for Finding Informative Few-shot Examples for LLM-based Entity Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qian%2C+K">Kun Qian</a>, <a href="/search/cs?searchtype=author&query=Sang%2C+Y">Yisi Sang</a>, <a href="/search/cs?searchtype=author&query=Bayat%2C+F+F">Farima Fatahi Bayat</a>, <a href="/search/cs?searchtype=author&query=Belyi%2C+A">Anton Belyi</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xianqi Chu</a>, <a href="/search/cs?searchtype=author&query=Govind%2C+Y">Yash Govind</a>, <a href="/search/cs?searchtype=author&query=Khorshidi%2C+S">Samira Khorshidi</a>, <a href="/search/cs?searchtype=author&query=Khot%2C+R">Rahul Khot</a>, <a href="/search/cs?searchtype=author&query=Luna%2C+K">Katherine Luna</a>, <a href="/search/cs?searchtype=author&query=Nikfarjam%2C+A">Azadeh Nikfarjam</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+X">Xiaoguang Qi</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fei Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xianhan Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunyao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04637v1-abstract-short" style="display: inline;"> Prompt engineering is an iterative procedure often requiring extensive manual effort to formulate suitable instructions for effectively directing large language models (LLMs) in specific tasks. Incorporating few-shot examples is a vital and effective approach to providing LLMs with precise instructions, leading to improved LLM performance. Nonetheless, identifying the most informative demonstratio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04637v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04637v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04637v1-abstract-full" style="display: none;"> Prompt engineering is an iterative procedure often requiring extensive manual effort to formulate suitable instructions for effectively directing large language models (LLMs) in specific tasks. Incorporating few-shot examples is a vital and effective approach to providing LLMs with precise instructions, leading to improved LLM performance. Nonetheless, identifying the most informative demonstrations for LLMs is labor-intensive, frequently entailing sifting through an extensive search space. In this demonstration, we showcase a human-in-the-loop tool called APE (Active Prompt Engineering) designed for refining prompts through active learning. Drawing inspiration from active learning, APE iteratively selects the most ambiguous examples for human feedback, which will be transformed into few-shot examples within the prompt. The demo recording can be found with the submission or be viewed at https://youtu.be/OwQ6MQx53-Y. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04637v1-abstract-full').style.display = 'none'; document.getElementById('2408.04637v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3 pages, Proceedings of the Fifth Workshop on Data Science with Human-in-the-Loop (DaSH 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03297">arXiv:2408.03297</a> <span> [<a href="https://arxiv.org/pdf/2408.03297">pdf</a>, <a href="https://arxiv.org/format/2408.03297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KnowPO: Knowledge-aware Preference Optimization for Controllable Knowledge Selection in Retrieval-Augmented Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruizhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yongxin Xu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Y">Yuzhen Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Runchuan Zhu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xinke Jiang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xu Chu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junfeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasha Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03297v2-abstract-short" style="display: inline;"> By integrating external knowledge, Retrieval-Augmented Generation (RAG) has become an effective strategy for mitigating the hallucination problems that large language models (LLMs) encounter when dealing with knowledge-intensive tasks. However, in the process of integrating external non-parametric supporting evidence with internal parametric knowledge, inevitable knowledge conflicts may arise, lea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03297v2-abstract-full').style.display = 'inline'; document.getElementById('2408.03297v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03297v2-abstract-full" style="display: none;"> By integrating external knowledge, Retrieval-Augmented Generation (RAG) has become an effective strategy for mitigating the hallucination problems that large language models (LLMs) encounter when dealing with knowledge-intensive tasks. However, in the process of integrating external non-parametric supporting evidence with internal parametric knowledge, inevitable knowledge conflicts may arise, leading to confusion in the model's responses. To enhance the knowledge selection of LLMs in various contexts, some research has focused on refining their behavior patterns through instruction-tuning. Nonetheless, due to the absence of explicit negative signals and comparative objectives, models fine-tuned in this manner may still exhibit undesirable behaviors such as contextual ignorance and contextual overinclusion. To this end, we propose a Knowledge-aware Preference Optimization strategy, dubbed KnowPO, aimed at achieving adaptive knowledge selection based on contextual relevance in real retrieval scenarios. Concretely, we proposed a general paradigm for constructing knowledge conflict datasets, which comprehensively cover various error types and learn how to avoid these negative signals through preference optimization methods. Simultaneously, we proposed a rewriting strategy and data ratio optimization strategy to address preference imbalances. Experimental results show that KnowPO outperforms previous methods for handling knowledge conflicts by over 37\%, while also exhibiting robust generalization across various out-of-distribution datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03297v2-abstract-full').style.display = 'none'; document.getElementById('2408.03297v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01803">arXiv:2408.01803</a> <span> [<a href="https://arxiv.org/pdf/2408.01803">pdf</a>, <a href="https://arxiv.org/format/2408.01803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> STBLLM: Breaking the 1-Bit Barrier with Structured Binary LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+P">Peijie Dong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lujun Li</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+Y">Yuedong Zhong</a>, <a href="/search/cs?searchtype=author&query=Du%2C+D">Dayou Du</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+R">Ruibo Fan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuhan Chen</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiang Wang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+W">Wei Xue</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yike Guo</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01803v2-abstract-short" style="display: inline;"> In this paper, we present the first structural binarization method for LLM compression to less than 1-bit precision. Although LLMs have achieved remarkable performance, their memory-bound nature during the inference stage hinders the adoption of resource-constrained devices. Reducing weights to 1-bit precision through binarization substantially enhances computational efficiency. We observe that so… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01803v2-abstract-full').style.display = 'inline'; document.getElementById('2408.01803v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01803v2-abstract-full" style="display: none;"> In this paper, we present the first structural binarization method for LLM compression to less than 1-bit precision. Although LLMs have achieved remarkable performance, their memory-bound nature during the inference stage hinders the adoption of resource-constrained devices. Reducing weights to 1-bit precision through binarization substantially enhances computational efficiency. We observe that some weights in binarized LLMs can be randomly flipped without significant performance degradation, suggesting the potential for further compression. To exploit this, our STBLLM employs an N:M sparsity technique to achieve structural binarization of the weights. Specifically, we introduce a novel Standardized Importance (SI) metric, which considers weight magnitude and input feature norm to more accurately assess weight significance. Then, we propose a layer-wise approach, allowing different layers of the LLM to be sparsified with varying N:M ratios, thereby balancing compression and accuracy. Furthermore, we implement a fine-grained grouping strategy for less important weights, applying distinct quantization schemes to sparse, intermediate, and dense regions. Finally, we design a specialized CUDA kernel to support structural binarization. We conduct extensive experiments on LLaMA-1/2/3, OPT family, and Mistral to evaluate the effectiveness of STBLLM. The results demonstrate that our approach performs better than other compressed binarization LLM methods while significantly reducing memory requirements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01803v2-abstract-full').style.display = 'none'; document.getElementById('2408.01803v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19998">arXiv:2407.19998</a> <span> [<a href="https://arxiv.org/pdf/2407.19998">pdf</a>, <a href="https://arxiv.org/format/2407.19998">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Do LLMs Really Adapt to Domains? An Ontology Learning Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mai%2C+H+T">Huu Tan Mai</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+C+X">Cuong Xuan Chu</a>, <a href="/search/cs?searchtype=author&query=Paulheim%2C+H">Heiko Paulheim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19998v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated unprecedented prowess across various natural language processing tasks in various application domains. Recent studies show that LLMs can be leveraged to perform lexical semantic tasks, such as Knowledge Base Completion (KBC) or Ontology Learning (OL). However, it has not effectively been verified whether their success is due to their ability to reason… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19998v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19998v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19998v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated unprecedented prowess across various natural language processing tasks in various application domains. Recent studies show that LLMs can be leveraged to perform lexical semantic tasks, such as Knowledge Base Completion (KBC) or Ontology Learning (OL). However, it has not effectively been verified whether their success is due to their ability to reason over unstructured or semi-structured data, or their effective learning of linguistic patterns and senses alone. This unresolved question is particularly crucial when dealing with domain-specific data, where the lexical senses and their meaning can completely differ from what a LLM has learned during its training stage. This paper investigates the following question: Do LLMs really adapt to domains and remain consistent in the extraction of structured knowledge, or do they only learn lexical senses instead of reasoning? To answer this question and, we devise a controlled experiment setup that uses WordNet to synthesize parallel corpora, with English and gibberish terms. We examine the differences in the outputs of LLMs for each corpus in two OL tasks: relation extraction and taxonomy discovery. Empirical results show that, while adapting to the gibberish corpora, off-the-shelf LLMs do not consistently reason over semantic relationships between concepts, and instead leverage senses and their frame. However, fine-tuning improves the performance of LLMs on lexical semantic tasks even when the domain-specific terms are arbitrary and unseen during pre-training, hinting at the applicability of pre-trained LLMs for OL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19998v1-abstract-full').style.display = 'none'; document.getElementById('2407.19998v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ISWC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18357">arXiv:2407.18357</a> <span> [<a href="https://arxiv.org/pdf/2407.18357">pdf</a>, <a href="https://arxiv.org/format/2407.18357">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Needle Segmentation Using GAN: Restoring Thin Instrument Visibility in Robotic Ultrasound </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhongliang Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuesong Li</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiangyu Chu</a>, <a href="/search/cs?searchtype=author&query=Karlas%2C+A">Angelos Karlas</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Y">Yuan Bi</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yingsheng Cheng</a>, <a href="/search/cs?searchtype=author&query=Au%2C+K+W+S">K. W. Samuel Au</a>, <a href="/search/cs?searchtype=author&query=Navab%2C+N">Nassir Navab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18357v1-abstract-short" style="display: inline;"> Ultrasound-guided percutaneous needle insertion is a standard procedure employed in both biopsy and ablation in clinical practices. However, due to the complex interaction between tissue and instrument, the needle may deviate from the in-plane view, resulting in a lack of close monitoring of the percutaneous needle. To address this challenge, we introduce a robot-assisted ultrasound (US) imaging s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18357v1-abstract-full').style.display = 'inline'; document.getElementById('2407.18357v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18357v1-abstract-full" style="display: none;"> Ultrasound-guided percutaneous needle insertion is a standard procedure employed in both biopsy and ablation in clinical practices. However, due to the complex interaction between tissue and instrument, the needle may deviate from the in-plane view, resulting in a lack of close monitoring of the percutaneous needle. To address this challenge, we introduce a robot-assisted ultrasound (US) imaging system designed to seamlessly monitor the insertion process and autonomously restore the visibility of the inserted instrument when misalignment happens. To this end, the adversarial structure is presented to encourage the generation of segmentation masks that align consistently with the ground truth in high-order space. This study also systematically investigates the effects on segmentation performance by exploring various training loss functions and their combinations. When misalignment between the probe and the percutaneous needle is detected, the robot is triggered to perform transverse searching to optimize the positional and rotational adjustment to restore needle visibility. The experimental results on ex-vivo porcine samples demonstrate that the proposed method can precisely segment the percutaneous needle (with a tip error of $0.37\pm0.29mm$ and an angle error of $1.19\pm 0.29^{\circ}$). Furthermore, the needle appearance can be successfully restored under the repositioned probe pose in all 45 trials, with repositioning errors of $1.51\pm0.95mm$ and $1.25\pm0.79^{\circ}$. from latex to text with math symbols <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18357v1-abstract-full').style.display = 'none'; document.getElementById('2407.18357v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by IEEE TIM. code: https://github.com/noseefood/NeedleSegmentation-GAN; video: https://youtu.be/4WuEP9PACs0</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17398">arXiv:2407.17398</a> <span> [<a href="https://arxiv.org/pdf/2407.17398">pdf</a>, <a href="https://arxiv.org/format/2407.17398">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> 3D Question Answering for City Scene Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+P">Penglei Sun</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yaoxian Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiang Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaofei Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiang Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tiefeng Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17398v1-abstract-short" style="display: inline;"> 3D multimodal question answering (MQA) plays a crucial role in scene understanding by enabling intelligent agents to comprehend their surroundings in 3D environments. While existing research has primarily focused on indoor household tasks and outdoor roadside autonomous driving tasks, there has been limited exploration of city-level scene understanding tasks. Furthermore, existing research faces c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17398v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17398v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17398v1-abstract-full" style="display: none;"> 3D multimodal question answering (MQA) plays a crucial role in scene understanding by enabling intelligent agents to comprehend their surroundings in 3D environments. While existing research has primarily focused on indoor household tasks and outdoor roadside autonomous driving tasks, there has been limited exploration of city-level scene understanding tasks. Furthermore, existing research faces challenges in understanding city scenes, due to the absence of spatial semantic information and human-environment interaction information at the city level.To address these challenges, we investigate 3D MQA from both dataset and method perspectives. From the dataset perspective, we introduce a novel 3D MQA dataset named City-3DQA for city-level scene understanding, which is the first dataset to incorporate scene semantic and human-environment interactive tasks within the city. From the method perspective, we propose a Scene graph enhanced City-level Understanding method (Sg-CityU), which utilizes the scene graph to introduce the spatial semantic. A new benchmark is reported and our proposed Sg-CityU achieves accuracy of 63.94 % and 63.76 % in different settings of City-3DQA. Compared to indoor 3D MQA methods and zero-shot using advanced large language models (LLMs), Sg-CityU demonstrates state-of-the-art (SOTA) performance in robustness and generalization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17398v1-abstract-full').style.display = 'none'; document.getElementById('2407.17398v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14923">arXiv:2407.14923</a> <span> [<a href="https://arxiv.org/pdf/2407.14923">pdf</a>, <a href="https://arxiv.org/format/2407.14923">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RayFormer: Improving Query-Based Multi-Camera 3D Object Detection via Ray-Centric Strategies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaomeng Chu</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiajun Deng</a>, <a href="/search/cs?searchtype=author&query=You%2C+G">Guoliang You</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+Y">Yifan Duan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yao Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanyong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14923v4-abstract-short" style="display: inline;"> The recent advances in query-based multi-camera 3D object detection are featured by initializing object queries in the 3D space, and then sampling features from perspective-view images to perform multi-round query refinement. In such a framework, query points near the same camera ray are likely to sample similar features from very close pixels, resulting in ambiguous query features and degraded de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14923v4-abstract-full').style.display = 'inline'; document.getElementById('2407.14923v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14923v4-abstract-full" style="display: none;"> The recent advances in query-based multi-camera 3D object detection are featured by initializing object queries in the 3D space, and then sampling features from perspective-view images to perform multi-round query refinement. In such a framework, query points near the same camera ray are likely to sample similar features from very close pixels, resulting in ambiguous query features and degraded detection accuracy. To this end, we introduce RayFormer, a camera-ray-inspired query-based 3D object detector that aligns the initialization and feature extraction of object queries with the optical characteristics of cameras. Specifically, RayFormer transforms perspective-view image features into bird's eye view (BEV) via the lift-splat-shoot method and segments the BEV map to sectors based on the camera rays. Object queries are uniformly and sparsely initialized along each camera ray, facilitating the projection of different queries onto different areas in the image to extract distinct features. Besides, we leverage the instance information of images to supplement the uniformly initialized object queries by further involving additional queries along the ray from 2D object detection boxes. To extract unique object-level features that cater to distinct queries, we design a ray sampling method that suitably organizes the distribution of feature sampling points on both images and bird's eye view. Extensive experiments are conducted on the nuScenes dataset to validate our proposed ray-inspired model design. The proposed RayFormer achieves superior performance of 55.5% mAP and 63.3% NDS, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14923v4-abstract-full').style.display = 'none'; document.getElementById('2407.14923v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACM Multimedia 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14302">arXiv:2407.14302</a> <span> [<a href="https://arxiv.org/pdf/2407.14302">pdf</a>, <a href="https://arxiv.org/format/2407.14302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Dyn-Adapter: Towards Disentangled Representation for Efficient Visual Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yurong Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Honghao Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiangxiang Chu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Li Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14302v2-abstract-short" style="display: inline;"> Parameter-efficient transfer learning (PETL) is a promising task, aiming to adapt the large-scale pre-trained model to downstream tasks with a relatively modest cost. However, current PETL methods struggle in compressing computational complexity and bear a heavy inference burden due to the complete forward process. This paper presents an efficient visual recognition paradigm, called Dynamic Adapte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14302v2-abstract-full').style.display = 'inline'; document.getElementById('2407.14302v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14302v2-abstract-full" style="display: none;"> Parameter-efficient transfer learning (PETL) is a promising task, aiming to adapt the large-scale pre-trained model to downstream tasks with a relatively modest cost. However, current PETL methods struggle in compressing computational complexity and bear a heavy inference burden due to the complete forward process. This paper presents an efficient visual recognition paradigm, called Dynamic Adapter (Dyn-Adapter), that boosts PETL efficiency by subtly disentangling features in multiple levels. Our approach is simple: first, we devise a dynamic architecture with balanced early heads for multi-level feature extraction, along with adaptive training strategy. Second, we introduce a bidirectional sparsity strategy driven by the pursuit of powerful generalization ability. These qualities enable us to fine-tune efficiently and effectively: we reduce FLOPs during inference by 50%, while maintaining or even yielding higher recognition accuracy. Extensive experiments on diverse datasets and pretrained backbones demonstrate the potential of Dyn-Adapter serving as a general efficiency booster for PETL in vision recognition tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14302v2-abstract-full').style.display = 'none'; document.getElementById('2407.14302v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13996">arXiv:2407.13996</a> <span> [<a href="https://arxiv.org/pdf/2407.13996">pdf</a>, <a href="https://arxiv.org/format/2407.13996">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Missile: Fine-Grained, Hardware-Level GPU Resource Isolation for Multi-Tenant DNN Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongkang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Haoxuan Yu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+C">Chenxia Han</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Cheng Wang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+B">Baotong Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Huaicheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13996v2-abstract-short" style="display: inline;"> Colocating high-priority, latency-sensitive (LS) and low-priority, best-effort (BE) DNN inference services reduces the total cost of ownership (TCO) of GPU clusters. Limited by bottlenecks such as VRAM channel conflicts and PCIe bus contentions, existing GPU sharing solutions are unable to avoid resource conflicts among concurrently executing tasks, failing to achieve both low latency for LS tasks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13996v2-abstract-full').style.display = 'inline'; document.getElementById('2407.13996v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13996v2-abstract-full" style="display: none;"> Colocating high-priority, latency-sensitive (LS) and low-priority, best-effort (BE) DNN inference services reduces the total cost of ownership (TCO) of GPU clusters. Limited by bottlenecks such as VRAM channel conflicts and PCIe bus contentions, existing GPU sharing solutions are unable to avoid resource conflicts among concurrently executing tasks, failing to achieve both low latency for LS tasks and high throughput for BE tasks. To bridge this gap, this paper presents Missile, a general GPU sharing solution for multi-tenant DNN inference on NVIDIA GPUs. Missile approximates fine-grained GPU hardware resource isolation between multiple LS and BE DNN tasks at software level. Through comprehensive reverse engineering, Missile first reveals a general VRAM channel hash mapping architecture of NVIDIA GPUs and eliminates VRAM channel conflicts using software-level cache coloring. It also isolates the PCIe bus and fairly allocates PCIe bandwidth using completely fair scheduler. We evaluate 12 mainstream DNNs with synthetic and real-world workloads on four GPUs. The results show that compared to the state-of-the-art GPU sharing solutions, Missile reduces tail latency for LS services by up to ~50%, achieves up to 6.1x BE job throughput, and allocates PCIe bus bandwidth to tenants on-demand for optimal performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13996v2-abstract-full').style.display = 'none'; document.getElementById('2407.13996v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 18 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.4.9; I.2.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.11644">arXiv:2407.11644</a> <span> [<a href="https://arxiv.org/pdf/2407.11644">pdf</a>, <a href="https://arxiv.org/format/2407.11644">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Perception Helps Planning: Facilitating Multi-Stage Lane-Level Integration via Double-Edge Structures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=You%2C+G">Guoliang You</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaomeng Chu</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+Y">Yifan Duan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingchen Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sha Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yao Li</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+J">Jianmin Ji</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanyong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.11644v1-abstract-short" style="display: inline;"> When planning for autonomous driving, it is crucial to consider essential traffic elements such as lanes, intersections, traffic regulations, and dynamic agents. However, they are often overlooked by the traditional end-to-end planning methods, likely leading to inefficiencies and non-compliance with traffic regulations. In this work, we endeavor to integrate the perception of these elements into… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11644v1-abstract-full').style.display = 'inline'; document.getElementById('2407.11644v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.11644v1-abstract-full" style="display: none;"> When planning for autonomous driving, it is crucial to consider essential traffic elements such as lanes, intersections, traffic regulations, and dynamic agents. However, they are often overlooked by the traditional end-to-end planning methods, likely leading to inefficiencies and non-compliance with traffic regulations. In this work, we endeavor to integrate the perception of these elements into the planning task. To this end, we propose Perception Helps Planning (PHP), a novel framework that reconciles lane-level planning with perception. This integration ensures that planning is inherently aligned with traffic constraints, thus facilitating safe and efficient driving. Specifically, PHP focuses on both edges of a lane for planning and perception purposes, taking into consideration the 3D positions of both lane edges and attributes for lane intersections, lane directions, lane occupancy, and planning. In the algorithmic design, the process begins with the transformer encoding multi-camera images to extract the above features and predicting lane-level perception results. Next, the hierarchical feature early fusion module refines the features for predicting planning attributes. Finally, the double-edge interpreter utilizes a late-fusion process specifically designed to integrate lane-level perception and planning information, culminating in the generation of vehicle control signals. Experiments on three Carla benchmarks show significant improvements in driving score of 27.20%, 33.47%, and 15.54% over existing algorithms, respectively, achieving the state-of-the-art performance, with the system operating up to 22.57 FPS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11644v1-abstract-full').style.display = 'none'; document.getElementById('2407.11644v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08972">arXiv:2407.08972</a> <span> [<a href="https://arxiv.org/pdf/2407.08972">pdf</a>, <a href="https://arxiv.org/format/2407.08972">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Revealing the Dark Secrets of Extremely Large Kernel ConvNets on Robustness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+H">Honghao Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yurong Zhang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+X">Xiaokun Feng</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiangxiang Chu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiqi Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08972v1-abstract-short" style="display: inline;"> Robustness is a vital aspect to consider when deploying deep learning models into the wild. Numerous studies have been dedicated to the study of the robustness of vision transformers (ViTs), which have dominated as the mainstream backbone choice for vision tasks since the dawn of 2020s. Recently, some large kernel convnets make a comeback with impressive performance and efficiency. However, it sti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08972v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08972v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08972v1-abstract-full" style="display: none;"> Robustness is a vital aspect to consider when deploying deep learning models into the wild. Numerous studies have been dedicated to the study of the robustness of vision transformers (ViTs), which have dominated as the mainstream backbone choice for vision tasks since the dawn of 2020s. Recently, some large kernel convnets make a comeback with impressive performance and efficiency. However, it still remains unclear whether large kernel networks are robust and the attribution of their robustness. In this paper, we first conduct a comprehensive evaluation of large kernel convnets' robustness and their differences from typical small kernel counterparts and ViTs on six diverse robustness benchmark datasets. Then to analyze the underlying factors behind their strong robustness, we design experiments from both quantitative and qualitative perspectives to reveal large kernel convnets' intriguing properties that are completely different from typical convnets. Our experiments demonstrate for the first time that pure CNNs can achieve exceptional robustness comparable or even superior to that of ViTs. Our analysis on occlusion invariance, kernel attention patterns and frequency characteristics provide novel insights into the source of robustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08972v1-abstract-full').style.display = 'none'; document.getElementById('2407.08972v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07639">arXiv:2407.07639</a> <span> [<a href="https://arxiv.org/pdf/2407.07639">pdf</a>, <a href="https://arxiv.org/format/2407.07639">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Explaining Graph Neural Networks for Node Similarity on Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Daza%2C+D">Daniel Daza</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+C+X">Cuong Xuan Chu</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T">Trung-Kien Tran</a>, <a href="/search/cs?searchtype=author&query=Stepanova%2C+D">Daria Stepanova</a>, <a href="/search/cs?searchtype=author&query=Cochez%2C+M">Michael Cochez</a>, <a href="/search/cs?searchtype=author&query=Groth%2C+P">Paul Groth</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07639v1-abstract-short" style="display: inline;"> Similarity search is a fundamental task for exploiting information in various applications dealing with graph data, such as citation networks or knowledge graphs. While this task has been intensively approached from heuristics to graph embeddings and graph neural networks (GNNs), providing explanations for similarity has received less attention. In this work we are concerned with explainable simil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07639v1-abstract-full').style.display = 'inline'; document.getElementById('2407.07639v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07639v1-abstract-full" style="display: none;"> Similarity search is a fundamental task for exploiting information in various applications dealing with graph data, such as citation networks or knowledge graphs. While this task has been intensively approached from heuristics to graph embeddings and graph neural networks (GNNs), providing explanations for similarity has received less attention. In this work we are concerned with explainable similarity search over graphs, by investigating how GNN-based methods for computing node similarities can be augmented with explanations. Specifically, we evaluate the performance of two prominent approaches towards explanations in GNNs, based on the concepts of mutual information (MI), and gradient-based explanations (GB). We discuss their suitability and empirically validate the properties of their explanations over different popular graph benchmarks. We find that unlike MI explanations, gradient-based explanations have three desirable properties. First, they are actionable: selecting inputs depending on them results in predictable changes in similarity scores. Second, they are consistent: the effect of selecting certain inputs overlaps very little with the effect of discarding them. Third, they can be pruned significantly to obtain sparse explanations that retain the effect on similarity scores. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07639v1-abstract-full').style.display = 'none'; document.getElementById('2407.07639v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06151">arXiv:2407.06151</a> <span> [<a href="https://arxiv.org/pdf/2407.06151">pdf</a>, <a href="https://arxiv.org/format/2407.06151">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Auto-PICNN: Automated machine learning for physics-informed convolutional neural networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wanyun Zhou</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06151v1-abstract-short" style="display: inline;"> Recent advances in deep learning for solving partial differential equations (PDEs) have introduced physics-informed neural networks (PINNs), which integrate machine learning with physical laws. Physics-informed convolutional neural networks (PICNNs) extend PINNs by leveraging CNNs for enhanced generalization and efficiency. However, current PICNNs depend on manual design, and inappropriate designs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06151v1-abstract-full').style.display = 'inline'; document.getElementById('2407.06151v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06151v1-abstract-full" style="display: none;"> Recent advances in deep learning for solving partial differential equations (PDEs) have introduced physics-informed neural networks (PINNs), which integrate machine learning with physical laws. Physics-informed convolutional neural networks (PICNNs) extend PINNs by leveraging CNNs for enhanced generalization and efficiency. However, current PICNNs depend on manual design, and inappropriate designs may not effectively solve PDEs. Furthermore, due to the diversity of physical problems, the ideal network architectures and loss functions vary across different PDEs. It is impractical to find the optimal PICNN architecture and loss function for each specific physical problem through extensive manual experimentation. To surmount these challenges, this paper uses automated machine learning (AutoML) to automatically and efficiently search for the loss functions and network architectures of PICNNs. We introduce novel search spaces for loss functions and network architectures and propose a two-stage search strategy. The first stage focuses on searching for factors and residual adjustment operations that influence the loss function, while the second stage aims to find the best CNN architecture. Experimental results show that our automatic searching method significantly outperforms the manually-designed model on multiple datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06151v1-abstract-full').style.display = 'none'; document.getElementById('2407.06151v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02846">arXiv:2407.02846</a> <span> [<a href="https://arxiv.org/pdf/2407.02846">pdf</a>, <a href="https://arxiv.org/format/2407.02846">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-Task Domain Adaptation for Language Grounding with 3D Objects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+P">Penglei Sun</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yaoxian Song</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xinglin Pan</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+P">Peijie Dong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaofei Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiang Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhixu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tiefeng Li</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02846v2-abstract-short" style="display: inline;"> The existing works on object-level language grounding with 3D objects mostly focus on improving performance by utilizing the off-the-shelf pre-trained models to capture features, such as viewpoint selection or geometric priors. However, they have failed to consider exploring the cross-modal representation of language-vision alignment in the cross-domain field. To answer this problem, we propose a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02846v2-abstract-full').style.display = 'inline'; document.getElementById('2407.02846v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02846v2-abstract-full" style="display: none;"> The existing works on object-level language grounding with 3D objects mostly focus on improving performance by utilizing the off-the-shelf pre-trained models to capture features, such as viewpoint selection or geometric priors. However, they have failed to consider exploring the cross-modal representation of language-vision alignment in the cross-domain field. To answer this problem, we propose a novel method called Domain Adaptation for Language Grounding (DA4LG) with 3D objects. Specifically, the proposed DA4LG consists of a visual adapter module with multi-task learning to realize vision-language alignment by comprehensive multimodal feature representation. Experimental results demonstrate that DA4LG competitively performs across visual and non-visual language descriptions, independent of the completeness of observation. DA4LG achieves state-of-the-art performance in the single-view setting and multi-view setting with the accuracy of 83.8% and 86.8% respectively in the language grounding benchmark SNARE. The simulation experiments show the well-practical and generalized performance of DA4LG compared to the existing methods. Our project is available at https://sites.google.com/view/da4lg. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02846v2-abstract-full').style.display = 'none'; document.getElementById('2407.02846v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02516">arXiv:2407.02516</a> <span> [<a href="https://arxiv.org/pdf/2407.02516">pdf</a>, <a href="https://arxiv.org/format/2407.02516">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EditFollower: Tunable Car Following Models for Customizable Adaptive Cruise Control Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xianda Chen</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+M">Meixin Zhu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a>, <a href="/search/cs?searchtype=author&query=Tiu%2C+P">PakHin Tiu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xinhu Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yinhai Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02516v1-abstract-short" style="display: inline;"> In the realm of driving technologies, fully autonomous vehicles have not been widely adopted yet, making advanced driver assistance systems (ADAS) crucial for enhancing driving experiences. Adaptive Cruise Control (ACC) emerges as a pivotal component of ADAS. However, current ACC systems often employ fixed settings, failing to intuitively capture drivers' social preferences and leading to potentia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02516v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02516v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02516v1-abstract-full" style="display: none;"> In the realm of driving technologies, fully autonomous vehicles have not been widely adopted yet, making advanced driver assistance systems (ADAS) crucial for enhancing driving experiences. Adaptive Cruise Control (ACC) emerges as a pivotal component of ADAS. However, current ACC systems often employ fixed settings, failing to intuitively capture drivers' social preferences and leading to potential function disengagement. To overcome these limitations, we propose the Editable Behavior Generation (EBG) model, a data-driven car-following model that allows for adjusting driving discourtesy levels. The framework integrates diverse courtesy calculation methods into long short-term memory (LSTM) and Transformer architectures, offering a comprehensive approach to capture nuanced driving dynamics. By integrating various discourtesy values during the training process, our model generates realistic agent trajectories with different levels of courtesy in car-following behavior. Experimental results on the HighD and Waymo datasets showcase a reduction in Mean Squared Error (MSE) of spacing and MSE of speed compared to baselines, establishing style controllability. To the best of our knowledge, this work represents the first data-driven car-following model capable of dynamically adjusting discourtesy levels. Our model provides valuable insights for the development of ACC systems that take into account drivers' social preferences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02516v1-abstract-full').style.display = 'none'; document.getElementById('2407.02516v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00599">arXiv:2407.00599</a> <span> [<a href="https://arxiv.org/pdf/2407.00599">pdf</a>, <a href="https://arxiv.org/format/2407.00599">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Parm: Efficient Training of Large Sparsely-Activated Models with Dedicated Schedules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xinglin Pan</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wenxiang Lin</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Shaohuai Shi</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Weinong Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00599v2-abstract-short" style="display: inline;"> Sparsely-activated Mixture-of-Expert (MoE) layers have found practical applications in enlarging the model size of large-scale foundation models, with only a sub-linear increase in computation demands. Despite the wide adoption of hybrid parallel paradigms like model parallelism, expert parallelism, and expert-sharding parallelism (i.e., MP+EP+ESP) to support MoE model training on GPU clusters, th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00599v2-abstract-full').style.display = 'inline'; document.getElementById('2407.00599v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00599v2-abstract-full" style="display: none;"> Sparsely-activated Mixture-of-Expert (MoE) layers have found practical applications in enlarging the model size of large-scale foundation models, with only a sub-linear increase in computation demands. Despite the wide adoption of hybrid parallel paradigms like model parallelism, expert parallelism, and expert-sharding parallelism (i.e., MP+EP+ESP) to support MoE model training on GPU clusters, the training efficiency is hindered by communication costs introduced by these parallel paradigms. To address this limitation, we propose Parm, a system that accelerates MP+EP+ESP training by designing two dedicated schedules for placing communication tasks. The proposed schedules eliminate redundant computations and communications and enable overlaps between intra-node and inter-node communications, ultimately reducing the overall training time. As the two schedules are not mutually exclusive, we provide comprehensive theoretical analyses and derive an automatic and accurate solution to determine which schedule should be applied in different scenarios. Experimental results on an 8-GPU server and a 32-GPU cluster demonstrate that Parm outperforms the state-of-the-art MoE training system, DeepSpeed-MoE, achieving 1.13$\times$ to 5.77$\times$ speedup on 1296 manually configured MoE layers and approximately 3$\times$ improvement on two real-world MoE models based on BERT and GPT-2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00599v2-abstract-full').style.display = 'none'; document.getElementById('2407.00599v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18181">arXiv:2406.18181</a> <span> [<a href="https://arxiv.org/pdf/2406.18181">pdf</a>, <a href="https://arxiv.org/ps/2406.18181">ps</a>, <a href="https://arxiv.org/format/2406.18181">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> On the Evaluation of Large Language Models in Unit Test Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+L">Lin Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chen Yang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shutao Gao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weijing Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bo Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qihao Zhu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiao Chu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jianyi Zhou</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+G">Guangtai Liang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qianxiang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Junjie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18181v2-abstract-short" style="display: inline;"> Unit testing is an essential activity in software development for verifying the correctness of software components. However, manually writing unit tests is challenging and time-consuming. The emergence of Large Language Models (LLMs) offers a new direction for automating unit test generation. Existing research primarily focuses on closed-source LLMs (e.g., ChatGPT and CodeX) with fixed prompting s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18181v2-abstract-full').style.display = 'inline'; document.getElementById('2406.18181v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18181v2-abstract-full" style="display: none;"> Unit testing is an essential activity in software development for verifying the correctness of software components. However, manually writing unit tests is challenging and time-consuming. The emergence of Large Language Models (LLMs) offers a new direction for automating unit test generation. Existing research primarily focuses on closed-source LLMs (e.g., ChatGPT and CodeX) with fixed prompting strategies, leaving the capabilities of advanced open-source LLMs with various prompting settings unexplored. Particularly, open-source LLMs offer advantages in data privacy protection and have demonstrated superior performance in some tasks. Moreover, effective prompting is crucial for maximizing LLMs' capabilities. In this paper, we conduct the first empirical study to fill this gap, based on 17 Java projects, five widely-used open-source LLMs with different structures and parameter sizes, and comprehensive evaluation metrics. Our findings highlight the significant influence of various prompt factors, show the performance of open-source LLMs compared to the commercial GPT-4 and the traditional Evosuite, and identify limitations in LLM-based unit test generation. We then derive a series of implications from our study to guide future research and practical use of LLM-based unit test generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18181v2-abstract-full').style.display = 'none'; document.getElementById('2406.18181v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ASE 2024, Research Paper Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10540">arXiv:2406.10540</a> <span> [<a href="https://arxiv.org/pdf/2406.10540">pdf</a>, <a href="https://arxiv.org/format/2406.10540">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Generating and Evolving Reward Functions for Highway Driving with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qiannan Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xianda Chen</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+X">Xiaowen Chu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+M">Meixin Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10540v1-abstract-short" style="display: inline;"> Reinforcement Learning (RL) plays a crucial role in advancing autonomous driving technologies by maximizing reward functions to achieve the optimal policy. However, crafting these reward functions has been a complex, manual process in many practices. To reduce this complexity, we introduce a novel framework that integrates Large Language Models (LLMs) with RL to improve reward function design in a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10540v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10540v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10540v1-abstract-full" style="display: none;"> Reinforcement Learning (RL) plays a crucial role in advancing autonomous driving technologies by maximizing reward functions to achieve the optimal policy. However, crafting these reward functions has been a complex, manual process in many practices. To reduce this complexity, we introduce a novel framework that integrates Large Language Models (LLMs) with RL to improve reward function design in autonomous driving. This framework utilizes the coding capabilities of LLMs, proven in other areas, to generate and evolve reward functions for highway scenarios. The framework starts with instructing LLMs to create an initial reward function code based on the driving environment and task descriptions. This code is then refined through iterative cycles involving RL training and LLMs' reflection, which benefits from their ability to review and improve the output. We have also developed a specific prompt template to improve LLMs' understanding of complex driving simulations, ensuring the generation of effective and error-free code. Our experiments in a highway driving simulator across three traffic configurations show that our method surpasses expert handcrafted reward functions, achieving a 22% higher average success rate. This not only indicates safer driving but also suggests significant gains in development productivity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10540v1-abstract-full').style.display = 'none'; document.getElementById('2406.10540v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 6 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chu%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Chu%2C+X&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository